<a href="https://colab.research.google.com/github/LRLeite/Data-Analytics/blob/main/IMDb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import gzip

## **Informações sobre os dados:**
https://www.imdb.com/interfaces/

In [2]:
#Salvar o link dos dados das informações básicas num objeto
url_basics = 'https://datasets.imdbws.com/title.basics.tsv.gz'

#Importar os dados com os dados das informações básicas
basics = pd.read_csv(url_basics, compression = 'gzip', sep = '\t', usecols = ['tconst','titleType', 'originalTitle', 'startYear', 'endYear', 'runtimeMinutes', 'genres'])
basics.head()

Unnamed: 0,tconst,titleType,originalTitle,startYear,endYear,runtimeMinutes,genres
0,tt0000001,short,Carmencita,1894,\N,1,"Documentary,Short"
1,tt0000002,short,Le clown et ses chiens,1892,\N,5,"Animation,Short"
2,tt0000003,short,Pauvre Pierrot,1892,\N,4,"Animation,Comedy,Romance"
3,tt0000004,short,Un bon bock,1892,\N,12,"Animation,Short"
4,tt0000005,short,Blacksmith Scene,1893,\N,1,"Comedy,Short"


In [3]:
#Salvar o link dos dados das avaliações num objeto
url_ratings = 'https://datasets.imdbws.com/title.ratings.tsv.gz'

#Importar os dados com os dados das avaliações
ratings = pd.read_csv(url_ratings, compression = 'gzip', sep = '\t')
ratings.head()

Unnamed: 0,tconst,averageRating,numVotes
0,tt0000001,5.7,1923
1,tt0000002,5.8,259
2,tt0000003,6.5,1737
3,tt0000004,5.6,174
4,tt0000005,6.2,2549


In [4]:
#Selecionar os dados para visualizar as avaliações de cada título
avaliation = pd.merge(basics[['tconst', 'titleType', 'originalTitle']], ratings[['tconst','averageRating', 'numVotes']], on = 'tconst', how = 'left')
avaliation.head()

Unnamed: 0,tconst,titleType,originalTitle,averageRating,numVotes
0,tt0000001,short,Carmencita,5.7,1923.0
1,tt0000002,short,Le clown et ses chiens,5.8,259.0
2,tt0000003,short,Pauvre Pierrot,6.5,1737.0
3,tt0000004,short,Un bon bock,5.6,174.0
4,tt0000005,short,Blacksmith Scene,6.2,2549.0


In [5]:
#Verificar os tipos de títulos
avaliation.titleType.unique()

array(['short', 'movie', 'tvSeries', 'tvShort', 'tvMovie', 'tvEpisode',
       'tvMiniSeries', 'tvSpecial', 'video', 'videoGame', 'tvPilot'],
      dtype=object)

## **Filmes com as melhores avaliações, com, no mínimo, 10000 votos**

In [6]:
avaliation[['originalTitle', 'averageRating', 'numVotes']].loc[(avaliation.titleType == 'movie') & (avaliation.numVotes >= 10000 )].sort_values(by = 'averageRating', ascending = False).head(10)

Unnamed: 0,originalTitle,averageRating,numVotes
4500860,The Silence of Swastika,9.7,10196.0
108638,The Shawshank Redemption,9.3,2662677.0
67249,The Godfather,9.2,1845237.0
241771,Hababam Sinifi,9.2,40895.0
6111798,CM101MMXI Fundamentals,9.1,46395.0
4445937,Kimetsu no Yaiba: Tsuzumi Yashiki Hen,9.0,13795.0
49174,12 Angry Men,9.0,786284.0
7313528,Aynabaji,9.0,27124.0
70072,The Godfather Part II,9.0,1263971.0
105590,Schindler's List,9.0,1348419.0


## **Filmes com as piores avaliações, com, no mínimo, 10000 votos**

In [7]:
avaliation[['originalTitle', 'averageRating', 'numVotes']].loc[(avaliation.titleType == 'movie') & (avaliation.numVotes >= 10000 )].sort_values(by = 'averageRating').head(10)

Unnamed: 0,originalTitle,averageRating,numVotes
7594242,Reis,1.0,73622.0
8147028,Cumali Ceber: Allah Seni Alsin,1.0,39058.0
8452314,Sadak 2,1.1,95944.0
7616910,Smolensk,1.2,39716.0
403810,Daniel der Zauberer,1.2,14426.0
238961,Foodfight!,1.3,11076.0
6718364,Saving Christmas,1.3,15987.0
259310,Superbabies: Baby Geniuses 2,1.5,31134.0
782874,Dünyayi Kurtaran Adam'in Oglu,1.5,16340.0
6350779,Justin Bieber's Believe,1.5,17885.0


## **Séries de tv com as melhores avaliações, com, no mínimo, 10000 votos**

In [8]:
avaliation[['originalTitle', 'averageRating', 'numVotes']].loc[(avaliation.titleType == 'tvSeries') & (avaliation.numVotes > 10000 )].sort_values(by = 'averageRating', ascending = False).head(10)

Unnamed: 0,originalTitle,averageRating,numVotes
8356485,Bluey,9.7,10116.0
875134,Breaking Bad,9.5,1864320.0
8346709,The Heroes,9.4,165941.0
9179919,The Chosen,9.4,28377.0
400154,Avatar: The Last Airbender,9.3,313421.0
2295974,Scam 1992: The Harshad Mehta Story,9.3,144012.0
293196,The Wire,9.3,338772.0
6803805,The Filthy Frank Show,9.2,33076.0
3376470,Aspirants,9.2,297088.0
137626,The Sopranos,9.2,396600.0


## **TOP 10 séries de tv com as piores avaliações, com, no mínimo, 10000 votos**

In [9]:
avaliation[['originalTitle', 'averageRating', 'numVotes']].loc[(avaliation.titleType == 'tvSeries') & (avaliation.numVotes > 10000 )].sort_values(by = 'averageRating').head(10)

Unnamed: 0,originalTitle,averageRating,numVotes
5215441,The Pogmentary,1.1,13648.0
2237106,Santa Inc.,1.6,14971.0
9005153,Rasbhari,2.8,15550.0
1450891,Keeping Up with the Kardashians,2.8,30173.0
8830167,Batwoman,3.4,44197.0
8926596,Jinn,3.5,10214.0
2855095,Paurashpur,3.7,18847.0
470446,The Hills,3.7,11310.0
4049771,Jersey Shore,3.8,23788.0
9267026,Resident Evil,4.0,40312.0
