In [1]:
#importing neccessary modules
import pandas as pd 
import sqlite3
import numpy as np 
import requests 
from bs4 import BeautifulSoup as BS
import matplotlib.pyplot as plt
import tabulate
from collections import Counter

%matplotlib inline 
conn = sqlite3.connect('im.db')
cur = conn.cursor()

In [2]:
#seeing available files 
! ls

Phase 1 Project Notebook.ipynb
README.md
bom.movie_gross.csv.gz
im.db
rt.movie_info.tsv.gz
rt.reviews.tsv.gz
tmdb.movies.csv.gz
tn.movie_budgets.csv.gz


In [3]:
#opening and reviewing imdb datasets from movie_basics
imdb = pd.read_sql("""SELECT * FROM movie_basics;""", conn)
imdb.head()

Unnamed: 0,movie_id,primary_title,original_title,start_year,runtime_minutes,genres
0,tt0063540,Sunghursh,Sunghursh,2013,175.0,"Action,Crime,Drama"
1,tt0066787,One Day Before the Rainy Season,Ashad Ka Ek Din,2019,114.0,"Biography,Drama"
2,tt0069049,The Other Side of the Wind,The Other Side of the Wind,2018,122.0,Drama
3,tt0069204,Sabse Bada Sukh,Sabse Bada Sukh,2018,,"Comedy,Drama"
4,tt0100275,The Wandering Soap Opera,La Telenovela Errante,2017,80.0,"Comedy,Drama,Fantasy"


In [4]:
#dropping unneccesary columns and rows 
imdb = imdb.drop(['original_title'], axis = 1)
imdb = imdb.drop(['movie_id'], axis = 1)
imdb = imdb.rename(columns={"primary_title" : "title"})
imdb = imdb.drop_duplicates(subset= "title")
imdb.head()

Unnamed: 0,title,start_year,runtime_minutes,genres
0,Sunghursh,2013,175.0,"Action,Crime,Drama"
1,One Day Before the Rainy Season,2019,114.0,"Biography,Drama"
2,The Other Side of the Wind,2018,122.0,Drama
3,Sabse Bada Sukh,2018,,"Comedy,Drama"
4,The Wandering Soap Opera,2017,80.0,"Comedy,Drama,Fantasy"


In [5]:
#opening and reviewing box office mojo dataset
gross = pd.read_csv("bom.movie_gross.csv.gz")
gross.head()

Unnamed: 0,title,studio,domestic_gross,foreign_gross,year
0,Toy Story 3,BV,415000000.0,652000000,2010
1,Alice in Wonderland (2010),BV,334200000.0,691300000,2010
2,Harry Potter and the Deathly Hallows Part 1,WB,296000000.0,664300000,2010
3,Inception,WB,292600000.0,535700000,2010
4,Shrek Forever After,P/DW,238700000.0,513900000,2010


In [6]:
gross = gross.drop(['year'], axis = 1)
gross.drop_duplicates(subset=['title'])

Unnamed: 0,title,studio,domestic_gross,foreign_gross
0,Toy Story 3,BV,415000000.0,652000000
1,Alice in Wonderland (2010),BV,334200000.0,691300000
2,Harry Potter and the Deathly Hallows Part 1,WB,296000000.0,664300000
3,Inception,WB,292600000.0,535700000
4,Shrek Forever After,P/DW,238700000.0,513900000
...,...,...,...,...
3382,The Quake,Magn.,6200.0,
3383,Edward II (2018 re-release),FM,4800.0,
3384,El Pacto,Sony,2500.0,
3385,The Swan,Synergetic,2400.0,


In [7]:
#printing column names in each dataset to find which to merge on
print(imdb.columns.tolist())
print(gross.columns.tolist())

['title', 'start_year', 'runtime_minutes', 'genres']
['title', 'studio', 'domestic_gross', 'foreign_gross']


In [8]:
#merging the imdb and gross datasets on "title"
df = imdb.merge(gross, how='outer', on='title')
df.head()

Unnamed: 0,title,start_year,runtime_minutes,genres,studio,domestic_gross,foreign_gross
0,Sunghursh,2013.0,175.0,"Action,Crime,Drama",,,
1,One Day Before the Rainy Season,2019.0,114.0,"Biography,Drama",,,
2,The Other Side of the Wind,2018.0,122.0,Drama,,,
3,Sabse Bada Sukh,2018.0,,"Comedy,Drama",,,
4,The Wandering Soap Opera,2017.0,80.0,"Comedy,Drama,Fantasy",,,


In [9]:
#checking the columns datatypes and converting them to the appropriate type
df['foreign_gross'] = pd.to_numeric(df['foreign_gross'], errors = 'coerce')
df.dtypes

title               object
start_year         float64
runtime_minutes    float64
genres              object
studio              object
domestic_gross     float64
foreign_gross      float64
dtype: object

In [10]:
#Finding Q3 for foreign_gross
foreign_Q3 = df['foreign_gross'].quantile(0.75)
print(foreign_Q3)

75050000.0


In [11]:
#limiting the dataset to show values only above the foreign_gross Q3
df = df[df['foreign_gross'] > 75050000.0]

df.head()

Unnamed: 0,title,start_year,runtime_minutes,genres,studio,domestic_gross,foreign_gross
54,The Secret Life of Walter Mitty,2013.0,114.0,"Adventure,Comedy,Drama",Fox,58200000.0,129900000.0
61,Spy,2011.0,110.0,"Action,Crime,Drama",Fox,110800000.0,124800000.0
71,Tangled,2010.0,100.0,"Adventure,Animation,Comedy",BV,200800000.0,391000000.0
73,John Carter,2012.0,132.0,"Action,Adventure,Sci-Fi",BV,73100000.0,211100000.0
95,The A-Team,2010.0,117.0,"Action,Adventure,Thriller",Fox,77200000.0,100000000.0


In [12]:
#sorting values in descending order by foreign_gross
df = df.sort_values(['foreign_gross'], ascending=False)
df.head()

Unnamed: 0,title,start_year,runtime_minutes,genres,studio,domestic_gross,foreign_gross
136200,Harry Potter and the Deathly Hallows Part 2,,,,WB,381000000.0,960500000.0
37794,Avengers: Age of Ultron,2015.0,141.0,"Action,Adventure,Sci-Fi",BV,459000000.0,946400000.0
136292,Marvel's The Avengers,,,,BV,623400000.0,895500000.0
80067,Jurassic World: Fallen Kingdom,2018.0,128.0,"Action,Adventure,Sci-Fi",Uni.,417700000.0,891800000.0
6575,Frozen,2010.0,93.0,"Adventure,Drama,Sport",BV,400700000.0,875700000.0


In [13]:
df = df.dropna(subset = ['genres'])
df.head()

Unnamed: 0,title,start_year,runtime_minutes,genres,studio,domestic_gross,foreign_gross
37794,Avengers: Age of Ultron,2015.0,141.0,"Action,Adventure,Sci-Fi",BV,459000000.0,946400000.0
80067,Jurassic World: Fallen Kingdom,2018.0,128.0,"Action,Adventure,Sci-Fi",Uni.,417700000.0,891800000.0
6575,Frozen,2010.0,93.0,"Adventure,Drama,Sport",BV,400700000.0,875700000.0
111301,Wolf Warrior 2,2017.0,123.0,"Action,Drama,Thriller",HC,2700000.0,867600000.0
28028,Transformers: Age of Extinction,2014.0,165.0,"Action,Adventure,Sci-Fi",Par.,245400000.0,858600000.0


In [14]:
#inserting space after comma 
df = df.replace(to_replace=',', value=', ', regex=True)
df.head()

Unnamed: 0,title,start_year,runtime_minutes,genres,studio,domestic_gross,foreign_gross
37794,Avengers: Age of Ultron,2015.0,141.0,"Action, Adventure, Sci-Fi",BV,459000000.0,946400000.0
80067,Jurassic World: Fallen Kingdom,2018.0,128.0,"Action, Adventure, Sci-Fi",Uni.,417700000.0,891800000.0
6575,Frozen,2010.0,93.0,"Adventure, Drama, Sport",BV,400700000.0,875700000.0
111301,Wolf Warrior 2,2017.0,123.0,"Action, Drama, Thriller",HC,2700000.0,867600000.0
28028,Transformers: Age of Extinction,2014.0,165.0,"Action, Adventure, Sci-Fi",Par.,245400000.0,858600000.0


In [21]:
#converting genre column to countable list
genres_list = df['genres'].tolist()
genres_list = [i.replace(''', '') for i in genres_list]
print(genres_list)

SyntaxError: EOF while scanning triple-quoted string literal (<ipython-input-21-bca2d8d1f049>, line 4)

In [23]:
g = 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Drama', 'Sport', 'Action', 'Drama', 'Thriller', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Thriller', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Family', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Fantasy', 'Adventure', 'Fantasy', 'Adventure', 'Fantasy', 'Biography', 'Drama', 'Music', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Thriller', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Thriller', 'Action', 'Adventure', 'Animation', 'Action', 'Adventure', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Drama', 'Thriller', 'Action', 'Adventure', 'Thriller', 'Action', 'Adventure', 'Comedy', 'Action', 'Crime', 'Thriller', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Comedy', 'Mystery', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Family', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Drama', 'Crime', 'Drama', 'Action', 'Adventure', 'Animation', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Family', 'Fantasy', 'Adventure', 'Drama', 'Sci-Fi', 'Action', 'Adventure', 'Thriller', 'Action', 'Adventure', 'Thriller', 'Adventure', 'Drama', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Drama', 'Sci-Fi', 'Thriller', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Animation', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Animation', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Crime', 'Thriller', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Animation', 'Drama', 'Romance', 'Thriller', 'Adventure', 'Drama', 'Sci-Fi', 'Action', 'Adventure', 'Animation', 'Action', 'Adventure', 'Fantasy', 'Adventure', 'Drama', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Comedy', 'Action', 'Horror', 'Sci-Fi', 'Action', 'Adventure', 'Animation', 'Action', 'Adventure', 'Sci-Fi', 'Horror', 'Thriller', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Adventure', 'Animation', 'Comedy', 'Animation', 'Comedy', 'Family', 'Adventure', 'Animation', 'Comedy', 'Adventure', 'Comedy', 'Sci-Fi', 'Action', 'Adventure', 'Crime', 'Adventure', 'Animation', 'Comedy', 'Animation', 'Drama', 'Fantasy', 'Action', 'Adventure', 'Biography', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Drama', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Horror', 'Action', 'Drama', 'History', 'Action', 'Sci-Fi', 'Thriller', 'Comedy', 'Mystery', 'Comedy', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Drama', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Family', 'Fantasy', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Animation', 'Comedy', 'Family', 'Action', 'Drama', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Animation', 'Comedy', 'Drama', 'Music', 'Animation', 'Comedy', 'Family', 'Action', 'Biography', 'Drama', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Comedy', 'Action', 'Horror', 'Sci-Fi', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Animation', 'Adventure', 'Mystery', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Biography', 'Drama', 'History', 'Biography', 'Crime', 'Drama', 'Comedy', 'Musical', 'Romance', 'Drama', 'Romance', 'Thriller', 'Action', 'Sci-Fi', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Fantasy', 'Drama', 'Romance', 'Drama', 'Western', 'Animation', 'Biography', 'Drama', 'Musical', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Thriller', 'Adventure', 'Comedy', 'Crime', 'Adventure', 'Comedy', 'Family', 'Drama', 'Action', 'Mystery', 'Sci-Fi', 'Adventure', 'Comedy', 'Fantasy', 'Action', 'Adventure', 'Fantasy', 'Action', 'Animation', 'Comedy', 'Action', 'Adventure', 'Drama', 'Action', 'Adventure', 'Horror', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Thriller', 'Action', 'Thriller', 'Action', 'Adventure', 'Thriller', 'Action', 'Thriller', 'Adventure', 'Animation', 'Comedy', 'Biography', 'Comedy', 'Drama', 'Crime', 'Mystery', 'Thriller', 'Action', 'Drama', 'Action', 'Adventure', 'Comedy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Sci-Fi', 'Thriller', 'Action', 'Adventure', 'Drama', 'Action', 'Adventure', 'Thriller', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Family', 'Fantasy', 'Drama', 'Thriller', 'Action', 'Adventure', 'Fantasy', 'Drama', 'Horror', 'Mystery', 'Action', 'Adventure', 'Drama', 'Action', 'Adventure', 'Fantasy', 'Action', 'Drama', 'Family', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Thriller', 'Animation', 'Comedy', 'Family', 'Adventure', 'Animation', 'Comedy', 'Adventure', 'Drama', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Comedy', 'Horror', 'Mystery', 'Thriller', 'Action', 'Adventure', 'Animation', 'Action', 'Adventure', 'Drama', 'Drama', 'Mystery', 'Thriller', 'Adventure', 'Animation', 'Comedy', 'Action', 'Horror', 'Sci-Fi', 'Adventure', 'Comedy', 'Drama', 'Action', 'Biography', 'Drama', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Comedy', 'Family', 'Action', 'Adventure', 'Sci-Fi', 'Adventure', 'Animation', 'Comedy', 'Action', 'Drama', 'Sci-Fi', 'Comedy', 'Drama', 'Romance', 'Adventure', 'Comedy', 'Family', 'Comedy', 'Drama', 'Romance', 'Action', 'Sci-Fi', 'Thriller', 'Action', 'Adventure', 'Family', 'Comedy', 'Drama', 'Romance', 'Adventure', 'Comedy', 'Family', 'Action', 'Adventure', 'Sci-Fi', 'Documentary', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Adventure', 'Drama', 'Action', 'Drama', 'Family', 'Horror', 'Mystery', 'Thriller', 'Action', 'Adventure', 'Fantasy', 'Comedy', 'Comedy', 'Family', 'Fantasy', 'Action', 'Adventure', 'Thriller', 'Action', 'Mystery', 'Thriller', 'Action', 'Animation', 'Comedy', 'Drama', 'Family', 'Horror', 'Mystery', 'Thriller', 'Action', 'Adventure', 'Crime', 'Action', 'Adventure', 'Thriller', 'Action', 'Adventure', 'Western', 'Drama', 'Mystery', 'Sci-Fi', 'Mystery', 'Thriller', 'Horror', 'Sci-Fi', 'Thriller', 'Action', 'Comedy', 'Comedy', 'Fantasy', 'Horror', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Comedy', 'Action', 'Adventure', 'Thriller', 'Adventure', 'Animation', 'Comedy', 'Comedy', 'Romance', 'Drama', 'History', 'Thriller', 'Adventure', 'Animation', 'Family', 'Action', 'Drama', 'Fantasy', 'Action', 'Crime', 'Thriller', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Comedy', 'Action', 'Biography', 'Drama', 'Documentary', 'Adventure', 'Animation', 'Comedy', 'Action', 'Adventure', 'Family', 'Drama', 'Romance', 'Action', 'Adventure', 'Comedy', 'Comedy', 'Drama', 'Fantasy', 'Adventure', 'Animation', 'Comedy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Drama', 'Fantasy', 'Action', 'Thriller', 'Biography', 'Drama', 'Thriller', 'Action', 'Adventure', 'Biography', 'Adventure', 'Comedy', 'Drama', 'Comedy', 'Romance', 'Sport', 'Action', 'Comedy', 'Crime', 'Action', 'Crime', 'Thriller', 'Action', 'Adventure', 'Mystery', 'Action', 'Drama', 'Action', 'Adventure', 'Sci-Fi', 'Action', 'Crime', 'Drama', 'Comedy', 'Mystery', 'Sci-Fi', 'Thriller', 'Adventure', 'Fantasy', 'Action', 'Drama', 'Thriller', 'Adventure', 'Animation', 'Comedy', 'Adventure', 'Drama', 'Fantasy', 'Adventure', 'Family', 'Fantasy', 'Biography', 'Drama', 'History', 'Adventure', 'Animation', 'Comedy', 'Action', 'Mystery', 'Thriller', 'Adventure', 'Comedy', 'Drama', 'Action', 'Comedy', 'Crime', 'Biography', 'Drama', 'Adventure', 'Family', 'Fantasy', 'Adventure', 'Animation', 'Family', 'Adventure', 'Drama', 'Action', 'Crime', 'Drama', 'Drama', 'Romance', 'Adventure', 'Animation', 'Comedy', 'Drama', 'Music', 'Action', 'Adventure', 'Drama', 'Comedy', 'Action', 'Comedy', 'Crime', 'Comedy', 'Action', 'Adventure', 'Fantasy', 'Comedy', 'Crime', 'Action', 'Adventure', 'Fantasy', 'Comedy', 'Romance', 'Comedy', 'Family', 'Fantasy', 'Action', 'Crime', 'Drama', 'Adventure', 'Family', 'Fantasy', 'Adventure', 'Comedy', 'Drama', 'Action', 'Adventure', 'Drama', 'Adventure', 'Animation', 'Comedy', 'Comedy', 'Romance', 'Comedy', 'Drama', 'Action', 'Adventure', 'Family', 'Biography', 'Drama', 'Adventure', 'Comedy', 'Crime', 'Horror', 'Action', 'Adventure', 'Fantasy', 'Action', 'Adventure', 'Drama', 'Comedy', 'Animation', 'Biography', 'Drama', 'Action', 'Crime', 'Mystery', 'Adventure', 'Drama', 'Family', 'Action', 'Adventure', 'Drama', 'Biography', 'Drama', 'Thriller', 'Adventure', 'Comedy', 'Action', 'Thriller', 'Adventure', 'Animation', 'Comedy', 'Action', 'Crime', 'Drama', 'Action', 'Adventure', 'Drama', 'Comedy', 'Action', 'Comedy', 'Thriller', 'Biography', 'Drama', 'History', 'Comedy', 'Romance, 'Drama', Music', 'Romance', 'Action', 'Drama', 'Thriller', 'Crime', 'Drama', 'Action', 'Adventure', 'Thriller', 'Action', 'Comedy', 'Western', 'Comedy', 'Drama', 'Romance', 'Action', 'Thriller', 'Action', 'Drama', 'Mystery', 'Action', 'Adventure', 'Sci-Fi', 'Comedy', 'Music', 'Horror', 'Mystery', 'Thriller', 'Drama', 'Mystery', 'Sci-Fi', 'Action', 'Comedy', 'War', 'Action', 'Comedy', 'Romance', 'Crime', 'Drama', 'Mystery', 'Adventure', 'Animation', 'Comedy', 'Action', 'Comedy', 'Thriller', 'Crime', 'Drama', 'Adventure', 'Animation', 'Comedy', 'Horror', 'Mystery', 'Thriller', 'Action', 'Adventure', 'Thriller', 'Drama', 'Sport', 'Biography', 'Drama', 'History', 'Action', 'Fantasy', 'Horror', 'Drama', 'History', 'War', 'Action', 'Adventure', 'Fantasy', 'Biography', 'Drama', 'Thriller', 'Action', 'Action', 'Adventure', 'Sci-Fi', 'Comedy', 'Drama', 'Action', 'Adventure', 'Drama', 'Biography', 'Drama', 'History', 'Comedy', 'Documentary', 'Drama', 'Crime', 'Drama', 'Mystery', 'Comedy', 'Romance', 'Adventure', 'Comedy', 'Family', 'Drama', 'History', 'Thriller', 'Biography', 'Drama', 'History', 'Horror', 'Action', 'Drama', 'Sci-Fi', 'Comedy', 'Adventure', 'Animation', 'Comedy', 'Comedy', 'Crime', 'Adventure', 'Animation', 'Comedy', 'Action', 'Crime', 'Thriller', 'Comedy', 'Drama', 'Romance', 'Comedy', 'Family', 'Romance', 'Action', 'Comedy', 'Crime', 'Horror', 'Mystery', 'Thriller', 'Comedy', 'Drama', 'Romance', 'Comedy', 'Drama', 'Romance', 'Action', 'Crime', 'Thriller', 'Comedy', 'Biography', 'Drama', 'Romance', 'Comedy', 'Romance', 'Comedy', 'Romance', 'Adventure', 'Animation', 'Comedy', 'Adventure', 'Animation', 'Comedy', 'Action', 'Thriller', 'Adventure', 'Drama', 'Thriller', 'Action', 'Horror', 'Thriller', 'Adventure', 'Comedy', 'Drama', 'Biography', 'Drama', 'Action', 'Drama', 'Thriller', 'Action', 'Adventure', 'Animation', 'Adventure', 'Comedy', 'Action', 'Mystery', 'Thriller', 'Action', 'Biography', 'Comedy', 'Action', 'Comedy', 'Crime', 'Mystery', 'Sci-Fi', 'Thriller', 'Drama', 'Action', 'Drama', 'Sci-Fi', 'Romance', 'Action', 'Adventure', 'Thriller', 'Comedy', 'Romance', 'Action', 'Drama', 'Adventure', 'Animation', 'Comedy', 'Drama', 'Romance', 'Adventure', 'Drama', 'Western', 'Action', 'Fantasy', 'Thriller', 'Comedy', 'Music', 'Action', 'Drama', 'History', 'Action', 'Thriller', 'Drama', 'Horror', 'Mystery', 'Thriller', 'Action', 'Drama', 'Romance', 'Action', 'Fantasy', 'Horror', 'Comedy', 'Romance', 'Drama', 'Fantasy', 'Horror', 'Drama', 'Romance', 'Comedy', 'Drama', 'History', 'Adventure', 'Comedy', 'Family', 'Action', 'Adventure', 'Drama', 'Adventure', 'Animation', 'Comedy', 'Comedy', 'Action', 'Adventure', 'Sci-Fi'

SyntaxError: invalid syntax (<ipython-input-23-7b8116fc6300>, line 1)

In [26]:
#counting the frequency of each genres

genres_counts = Counter('g')
print(genres_counts)

Counter({'g': 1})


In [None]:
data = ({'Adventure': 1338, 'Action': 1329, 'Comedy': 781, 'Drama': 727, 'Sci-Fi': 483, 'Thriller': 476, 'Animation': 385, 'Fantasy': 274, 'Crime': 215, 'Mystery': 205, 'Biography': 168, 'Horror': 148, 'Family': 135, 'Romance': 115, 'History': 71, 'Music': 49, 'Sport': 32, 'Documentary': 26, 'Western': 14, 'Musical': 14, 'War': 6, '  ': 3})
names = list(data.keys())
values = list(data.values())

plt.bar(range(len(data)), values, tick_label=names)
plt.show()

In [None]:
#counting the frequency of each studio

studio_counts = Counter(df['studio'])
print(studio_counts)

In [None]:
data = ({'Fox': 62, 'BV': 55, 'Uni.': 50, 'WB': 44, 'Sony': 43, 'Par.': 38, 'WB (NL)': 21, 'WGUSA': 12, 'LGF': 12, 'LG/S': 11, 'Wein.': 9, 'P/DW': 8, 'CL': 7, 'FoxS': 7, 'SGem': 5, 'TriS': 4, 'Sum.': 3, 'Rela.': 3, 'Focus': 3, 'Yash': 3, 'FUN': 2, 'STX': 2, 'HC': 1, 'FR': 1, 'UTV': 1, 'GrtIndia': 1, 'Magn.': 1, 'WAMCR': 1, 'W/Dim.': 1, 'LGP': 1, 'Vari.': 1, 'MGM': 1, 'EOne': 1, 'SPC': 1, 'MBox': 1, 'WHE': 1, 'RTWC': 1})
names = list(data.keys())
values = list(data.values())

plt.bar(range(len(data)), values, tick_label=names)
plt.show()