# This is my IMDb scores analysis code for my codecademy portfolio

### Setup
##### Import the relevant modules and read the csv file.

In [127]:
import pandas as pd
import numpy as np
data_IMDb = pd.read_csv('Netflix TV Shows and Movies.csv')

In [128]:
data_IMDb.head()
data_IMDb.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5283 entries, 0 to 5282
Data columns (total 11 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   index              5283 non-null   int64  
 1   id                 5283 non-null   object 
 2   title              5283 non-null   object 
 3   type               5283 non-null   object 
 4   description        5278 non-null   object 
 5   release_year       5283 non-null   int64  
 6   age_certification  2998 non-null   object 
 7   runtime            5283 non-null   int64  
 8   imdb_id            5283 non-null   object 
 9   imdb_score         5283 non-null   float64
 10  imdb_votes         5267 non-null   float64
dtypes: float64(2), int64(3), object(6)
memory usage: 454.1+ KB


#
### Data Clean Up
#### Here we are removing the unnecessary columns and renaming the columns in title format.

In [129]:
data_IMDb = data_IMDb.drop(['index','id','imdb_id'],axis=1)
column_titles = {'title':'Title', 'type':'Type', 'description':'Description', 'release_year':'Release Year', 'age_certification':'Age Certification', 'runtime': 'Runtime (min)', 'imdb_score':'IMDb Score', 'imdb_votes':'IMDb Votes'}
data_IMDb = data_IMDb.rename(column_titles,axis=1)
data_IMDb.head()

Unnamed: 0,Title,Type,Description,Release Year,Age Certification,Runtime (min),IMDb Score,IMDb Votes
0,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,113,8.3,795222.0
1,Monty Python and the Holy Grail,MOVIE,"King Arthur, accompanied by his squire, recrui...",1975,PG,91,8.2,530877.0
2,Life of Brian,MOVIE,"Brian Cohen is an average young Jewish man, bu...",1979,R,94,8.0,392419.0
3,The Exorcist,MOVIE,12-year-old Regan MacNeil begins to adapt an e...,1973,R,133,8.1,391942.0
4,Monty Python's Flying Circus,SHOW,A British sketch comedy series with the shows ...,1969,TV-14,30,8.8,72895.0


#
# Data Analysis

#### What fraction of the data corresponds to Movies and TV Shows?

In [130]:
data_IMDb['Type'].value_counts(normalize=True)

MOVIE    0.644899
SHOW     0.355101
Name: Type, dtype: float64

#
#### List of Movies in order of IMDb score, highest to lowest.

In [131]:
Movies = data_IMDb[data_IMDb['Type'] == 'MOVIE']

Movies.sort_values(by='IMDb Score',ascending=False).head()

Unnamed: 0,Title,Type,Description,Release Year,Age Certification,Runtime (min),IMDb Score,IMDb Votes
3172,David Attenborough: A Life on Our Planet,MOVIE,The story of life on our planet by the man who...,2020,PG,83,9.0,31180.0
2685,C/o Kancharapalem,MOVIE,From a schoolboyâ€™s crush to a middle-aged ba...,2018,PG,152,9.0,6562.0
24,No Longer Kids,MOVIE,"By coincidence, Ahmad discovers that his fathe...",1979,,235,9.0,943.0
822,Chhota Bheem & Krishna in Mayanagari,MOVIE,Bheem and his Friends are having a great time ...,2011,G,66,9.0,5.0
246,Inception,MOVIE,"Cobb, a skilled thief who commits corporate es...",2010,PG-13,148,8.8,2268288.0


#
#### List of Movies in order of most to least votes.

In [132]:
Movies.sort_values(by='IMDb Votes',ascending=False).head()

Unnamed: 0,Title,Type,Description,Release Year,Age Certification,Runtime (min),IMDb Score,IMDb Votes
246,Inception,MOVIE,"Cobb, a skilled thief who commits corporate es...",2010,PG-13,148,8.8,2268288.0
101,Forrest Gump,MOVIE,A man with a low IQ has accomplished great thi...,1994,PG-13,142,8.8,1994599.0
662,Django Unchained,MOVIE,"With the help of a German bounty hunter, a fre...",2012,R,165,8.4,1472668.0
104,Saving Private Ryan,MOVIE,"As U.S. troops storm the beaches of Normandy, ...",1998,R,169,8.6,1346020.0
0,Taxi Driver,MOVIE,A mentally unstable Vietnam War veteran works ...,1976,R,113,8.3,795222.0


#
#### List of TV Shows in order of IMDb score, highest to lowest.

In [133]:
# List of TV Shows in order of IMDb score, highest to lowest.

TV_Shows = data_IMDb[data_IMDb['Type'] == 'SHOW']

TV_Shows.sort_values(by='IMDb Score',ascending=False).head()

Unnamed: 0,Title,Type,Description,Release Year,Age Certification,Runtime (min),IMDb Score,IMDb Votes
2787,#ABtalks,SHOW,#ABtalks is a YouTube interview show hosted by...,2018,TV-PG,68,9.6,7.0
622,Khawatir,SHOW,A TV show devoted to help young people to be m...,2005,TV-14,20,9.6,3046.0
229,Breaking Bad,SHOW,"When Walter White, a New Mexico chemistry teac...",2008,TV-MA,48,9.5,1727694.0
245,Avatar: The Last Airbender,SHOW,"In a war-torn world of elemental magic, a youn...",2005,TV-Y7,24,9.3,297336.0
3597,Kota Factory,SHOW,"Dedicated to Shrimati SL Loney ji, Shri Irodov...",2019,TV-MA,42,9.3,66985.0


#
#### List of TV Shows in order of most to least votes.

In [134]:
# List of TV Shows in order of most to least votes.

TV_Shows.sort_values(by='IMDb Votes',ascending=False).head()

Unnamed: 0,Title,Type,Description,Release Year,Age Certification,Runtime (min),IMDb Score,IMDb Votes
229,Breaking Bad,SHOW,"When Walter White, a New Mexico chemistry teac...",2008,TV-MA,48,9.5,1727694.0
1145,Stranger Things,SHOW,"When a young boy vanishes, a small town uncove...",2016,TV-14,52,8.7,989090.0
230,The Walking Dead,SHOW,Sheriff's deputy Rick Grimes awakens from a co...,2010,TV-MA,46,8.2,945125.0
660,Black Mirror,SHOW,A contemporary British re-working of The Twili...,2011,TV-MA,59,8.8,515577.0
883,House of Cards,SHOW,"Set in present day Washington, D.C., House of ...",2013,TV-MA,52,8.7,494092.0


## Breakdown of Movie and TV Show analysis.

In [135]:
Movies_Mean = Movies['IMDb Score'].describe()[1].round(decimals=2)
TV_Shows_Mean = TV_Shows['IMDb Score'].describe()[1].round(decimals=2)

print("The mean movie rating from IMDb is " + str(Movies_Mean))
print("The mean TV Show rating from IMDb is " + str(TV_Shows_Mean))

if Movies_Mean > TV_Shows_Mean:
    print("On average Movies score higher on IMDb's rating.")
elif Movies_Mean < TV_Shows_Mean:
    print("On average TV Shows score higher on IMDb's rating.")
elif Movies_Mean == TV_Shows_Mean:
    print("On average Movies and Tv Shows score similar in IMDb's rating.")
    

Movies_total_votes = np.sum(Movies['IMDb Votes']).astype('int')
TV_Shows_total_votes = np.sum(TV_Shows['IMDb Votes']).astype('int')

if Movies_total_votes > TV_Shows_total_votes:
    print("More people have voted for Movies than TV Shows.")
elif Movies_total_votes < TV_Shows_total_votes:
    print("More people have voted for TV Shows than Movies.") 
elif Movies_total_votes == TV_Shows_total_votes:
    print("There are an equal number of votes for Movies and TV Shows.")

The mean movie rating from IMDb is 6.27
The mean TV Show rating from IMDb is 7.02
On average TV Shows score higher on IMDb's rating.
More people have voted for Movies than TV Shows.


#
# Conclusions
##
#### In Summary from the analysis conducted on the data from IMDb it is apparent that the highest rated movie is "David Attenborough: A Life on Our Planet" and the highest rated TV Shows are "#ABtalks" and "Khawatir". There are limitations to this data as some of the Movies/TV Shows had very few votes as such their result can be heavily biased as #ABtalks had only 7 votes.

#### However the Movie which was most voted for was "Inception" and the TV Show with the most votes was "Breaking Bad". Both of these also scored very highly, 5th for Inception on the highest rated Movies and 3rd from Breaking Bad on the highest rated TV Shows. Therefore there would be greater confidence in saying that these are the most popular in their respective types.

#### Finaly, on average TV Shows score higher on IMDb's rating, while, more people have voted for Movies than TV Shows.