## Import Libraries

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

## Reading Data

In [2]:
df = pd.read_csv("imdb_movies.csv")
df

Unnamed: 0,ranking of movie,movie name,Year,certificate,runtime,genre,RATING,DETAIL ABOUT MOVIE,DIRECTOR,ACTOR 1,ACTOR 2,ACTOR 3,ACTOR 4,votes,metascore,GROSS COLLECTION
0,1,Jai Bhim,-2021,TV-MA,164 min,"Crime, Drama",9.4,When a tribal man is arrested for a case of al...,T.J. Gnanavel,Suriya,Lijo Mol Jose,Manikandan,Rajisha Vijayan,163431,,
1,2,The Shawshank Redemption,-1994,R,142 min,Drama,9.3,Two imprisoned men bond over a number of years...,Frank Darabont,Tim Robbins,Morgan Freeman,Bob Gunton,William Sadler,2515762,80.0,$28.34M
2,3,The Godfather,-1972,R,175 min,"Crime, Drama",9.2,"The Godfather follows Vito Corleone, Don of th...",Francis Ford Coppola,Marlon Brando,Al Pacino,James Caan,Diane Keaton,1732749,100.0,$134.97M
3,4,The Dark Knight,-2008,PG-13,152 min,"Action, Crime, Drama",9.0,When the menace known as the Joker wreaks havo...,Christopher Nolan,Christian Bale,Heath Ledger,Aaron Eckhart,Michael Caine,2466041,84.0,$534.86M
4,5,The Godfather: Part II,-1974,R,202 min,"Crime, Drama",9.0,The early life and career of Vito Corleone in ...,Francis Ford Coppola,Al Pacino,Robert De Niro,Robert Duvall,Diane Keaton,1202401,90.0,$57.30M
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
245,246,Mr. Smith Goes to Washington,-1939,Passed,129 min,"Comedy, Drama",8.1,A naive youth leader is appointed to fill a va...,Frank Capra,James Stewart,Jean Arthur,Claude Rains,Edward Arnold,112642,73.0,$9.60M
246,247,Gone with the Wind,-1939,Passed,238 min,"Drama, History, Romance",8.1,The manipulative daughter of a Georgia plantat...,Victor Fleming,George Cukor,Sam Wood,Clark Gable,Vivien Leigh,304725,97.0,$198.68M
247,248,It Happened One Night,-1934,Passed,105 min,"Comedy, Romance",8.1,A renegade reporter trailing a young runaway h...,Frank Capra,Clark Gable,Claudette Colbert,Walter Connolly,Roscoe Karns,100198,87.0,$4.36M
248,249,The Passion of Joan of Arc,-1928,Passed,114 min,"Biography, Drama, History",8.1,"In 1431, Jeanne d'Arc is placed on trial on ch...",Carl Theodor Dreyer,Maria Falconetti,Eugene Silvain,André Berley,Maurice Schutz,52264,,$0.02M


In [3]:
df.columns

Index(['ranking of movie', 'movie name ', 'Year', 'certificate', 'runtime',
       'genre', 'RATING', 'DETAIL ABOUT MOVIE', 'DIRECTOR ', 'ACTOR 1',
       'ACTOR 2', 'ACTOR 3', 'ACTOR 4', 'votes', 'metascore',
       'GROSS COLLECTION'],
      dtype='object')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 250 entries, 0 to 249
Data columns (total 16 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   ranking of movie    250 non-null    int64  
 1   movie name          250 non-null    object 
 2   Year                250 non-null    object 
 3   certificate         250 non-null    object 
 4   runtime             250 non-null    object 
 5   genre               250 non-null    object 
 6   RATING              250 non-null    float64
 7   DETAIL ABOUT MOVIE  250 non-null    object 
 8   DIRECTOR            250 non-null    object 
 9   ACTOR 1             250 non-null    object 
 10  ACTOR 2             250 non-null    object 
 11  ACTOR 3             250 non-null    object 
 12  ACTOR 4             250 non-null    object 
 13  votes               250 non-null    int64  
 14  metascore           218 non-null    float64
 15  GROSS COLLECTION    214 non-null    object 
dtypes: float

In [5]:
df.describe()

Unnamed: 0,ranking of movie,RATING,votes,metascore
count,250.0,250.0,250.0,218.0
mean,125.5,8.3084,578529.9,82.449541
std,72.312977,0.234669,495130.4,10.822392
min,1.0,8.1,26538.0,55.0
25%,63.25,8.1,168862.8,75.0
50%,125.5,8.2,431335.5,84.0
75%,187.75,8.4,885425.5,90.0
max,250.0,9.4,2515762.0,100.0


## Drop Unnecessary Columns

In [6]:
df.drop(['ranking of movie', 'certificate', 'DETAIL ABOUT MOVIE', 'ACTOR 3', 'ACTOR 4'], axis=1, inplace=True)

## Rename Columns in a suitable form

In [7]:
df.columns

Index(['movie name ', 'Year', 'runtime', 'genre', 'RATING', 'DIRECTOR ',
       'ACTOR 1', 'ACTOR 2', 'votes', 'metascore', 'GROSS COLLECTION'],
      dtype='object')

In [8]:
df.columns = df.columns.str.lower().str.strip()
df.columns

Index(['movie name', 'year', 'runtime', 'genre', 'rating', 'director',
       'actor 1', 'actor 2', 'votes', 'metascore', 'gross collection'],
      dtype='object')

In [9]:
df.columns = df.columns.str.replace(' ', '_')
df.columns

Index(['movie_name', 'year', 'runtime', 'genre', 'rating', 'director',
       'actor_1', 'actor_2', 'votes', 'metascore', 'gross_collection'],
      dtype='object')

## What is the minimum and maximum ratings ?


In [10]:
# maximum rating
df['rating'].max()

9.4

In [11]:
df['rating'].min()

8.1

## What are the movies with rating > 9 ?

In [12]:
df[df['rating'] > 9][['movie_name', 'actor_1', 'actor_2']]

Unnamed: 0,movie_name,actor_1,actor_2
0,Jai Bhim,Suriya,Lijo Mol Jose
1,The Shawshank Redemption,Tim Robbins,Morgan Freeman
2,The Godfather,Marlon Brando,Al Pacino


## Top 10 movies per metascore and user rating

In [13]:
# Top per metascore
df.sort_values(by='metascore', ascending=False)[['movie_name', 'metascore']].head(10)

Unnamed: 0,movie_name,metascore
112,Vertigo,100.0
109,Lawrence of Arabia,100.0
117,Citizen Kane,100.0
50,Casablanca,100.0
49,Rear Window,100.0
2,The Godfather,100.0
224,Fanny and Alexander,100.0
113,Singin' in the Rain,99.0
52,City Lights,99.0
31,Seven Samurai,98.0


In [14]:
#Top per user rating
df.sort_values(by='rating', ascending=False)[['movie_name', 'rating']].head(10)

Unnamed: 0,movie_name,rating
0,Jai Bhim,9.4
1,The Shawshank Redemption,9.3
2,The Godfather,9.2
3,The Dark Knight,9.0
4,The Godfather: Part II,9.0
5,12 Angry Men,9.0
6,The Lord of the Rings: The Return of the King,8.9
7,Pulp Fiction,8.9
8,Schindler's List,8.9
12,The Lord of the Rings: The Fellowship of the Ring,8.8


## Top 10 genres

In [15]:
# genres
df.groupby('genre')[['movie_name']].count().sort_values(by='movie_name', ascending=False)

Unnamed: 0_level_0,movie_name
genre,Unnamed: 1_level_1
Drama,22
"Crime, Drama",14
"Biography, Drama, History",9
"Animation, Adventure, Comedy",8
"Crime, Drama, Mystery",8
...,...
"Crime, Drama, Film-Noir",1
"Crime, Drama, History",1
"Crime, Drama, Horror",1
"Adventure, Drama, Thriller",1


## Top 10 Directories

In [16]:
# rating
df.groupby('director')[['rating']].mean().sort_values(by='rating', ascending=False).head(10)

Unnamed: 0_level_0,rating
director,Unnamed: 1_level_1
T.J. Gnanavel,9.4
Frank Darabont,8.95
Francis Ford Coppola,8.866667
Jon Watts,8.8
Peter Jackson,8.8
Lana Wachowski,8.7
Irvin Kershner,8.7
Robert Zemeckis,8.65
Jonathan Demme,8.6
Fernando Meirelles,8.6


## Top 10 First Actors

In [17]:
# actor_1
df.groupby('actor_1')[['rating']].mean().sort_values(by='rating', ascending=False).head(10)

Unnamed: 0_level_0,rating
actor_1,Unnamed: 1_level_1
Suriya,9.4
Tim Robbins,9.3
Liam Neeson,8.9
John Travolta,8.9
Elijah Wood,8.8
Tom Holland,8.8
Henry Fonda,8.75
Lilly Wachowski,8.7
Marlon Brando,8.65
Daveigh Chase,8.6


## Top 10 Second Actors

In [18]:
# actor_2
df.groupby('actor_2')[['rating']].mean().sort_values(by='rating', ascending=False).head(10)

Unnamed: 0_level_0,rating
actor_2,Unnamed: 1_level_1
Lijo Mol Jose,9.4
Morgan Freeman,9.3
Al Pacino,9.2
Heath Ledger,9.0
Lee J. Cobb,9.0
Viggo Mortensen,8.9
Ralph Fiennes,8.9
Uma Thurman,8.9
Zendaya,8.8
Joseph Gordon-Levitt,8.8
