In [1]:
%load_ext autoreload
%autoreload 2
%reload_ext autoreload

import polars as pl
import pandas as pd
pd.set_option('display.float_format', lambda x: f'{x :.1f}')
from cleaner import DataCleaner
import numpy as np
from datetime import datetime
from tools import (
    order_and_rename,
    import_datasets,
    bins_generator
)


In [2]:
movies = pd.read_csv('clean_datasets/movies_rating.csv')
ratings = pd.read_csv(
    'movies_datasets/title_ratings.tsv',
    sep = "\t",
)
joined = pd.merge(
    movies,
    ratings,
    left_on = "titre_id",
    right_on = "tconst"
)

In [3]:
joined = joined.rename(
    columns={
        "averageRating" : "rating_avg",
        "numVotes" : "rating_votes"
    }
).drop("tconst", axis=1)

rating = joined

In [4]:
bins, names = bins_generator(rating["titre_date_sortie"].max())

rating["cuts"] = pd.cut(
    rating["titre_date_sortie"],
    bins=bins,
    labels=names
)

In [10]:
rating

Unnamed: 0,titre_id,titre_str,titre_type,titre_date_sortie,titre_date_fin,titre_duree,titre_genres,person_id,person_name,person_birthdate,person_job,person_role,person_index,rating_avg,rating_votes,cuts
0,tt0000009,Miss Jerry,movie,1894,0,45,Romance,nm0063086,Blanche Bayliss,1878,actress,"[""Miss Geraldine Holbrook (Miss Jerry)""]",1,5.3,207,<1900
1,tt0000009,Miss Jerry,movie,1894,0,45,Romance,nm0183823,William Courtenay,1875,actor,"[""Mr. Hamilton""]",2,5.3,207,<1900
2,tt0000009,Miss Jerry,movie,1894,0,45,Romance,nm1309758,Chauncey Depew,1834,actor,"[""Chauncey Depew - the Director of the New Yor...",3,5.3,207,<1900
3,tt0000574,The Story of the Kelly Gang,movie,1906,0,70,"Action,Adventure,Biography",nm0846887,Elizabeth Tait,1879,actress,"[""Kate Kelly""]",1,6.0,853,1900-1920
4,tt0000574,The Story of the Kelly Gang,movie,1906,0,70,"Action,Adventure,Biography",nm0846894,John Tait,1871,actor,"[""School Master""]",2,6.0,853,1900-1920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1053771,tt9916538,Kuambil Lagi Hatiku,movie,2019,0,123,Drama,nm1266058,Ria Irawan,1969,actress,"[""Dewi""]",4,8.6,7,2010-2020
1053772,tt9916730,6 Gunn,movie,2017,0,116,Drama,nm6096005,Devadhar Archit,0,actor,0,1,7.6,11,2010-2020
1053773,tt9916730,6 Gunn,movie,2017,0,116,Drama,nm0059461,Sunil Barve,0,actor,0,2,7.6,11,2010-2020
1053774,tt9916730,6 Gunn,movie,2017,0,116,Drama,nm13233318,Ganesh Vasant Patil,0,actor,"[""Vishal""]",3,7.6,11,2010-2020


In [5]:
rating["rating_avg"].median()

6.1

In [6]:
rating["rating_avg"].describe()

count   1053776.0
mean          6.0
std           1.3
min           1.0
25%           5.2
50%           6.1
75%           6.9
max          10.0
Name: rating_avg, dtype: float64

In [7]:
condi = (
    rating["rating_avg"] == 10
)

best_movies = rating[condi]

best_movies

Unnamed: 0,titre_id,titre_str,titre_type,titre_date_sortie,titre_date_fin,titre_duree,titre_genres,person_id,person_name,person_birthdate,person_job,person_role,person_index,rating_avg,rating_votes,cuts
565926,tt10449358,Kaputol,movie,2019,0,120,Drama,nm0317737,Cherie Gil,1963,actress,"[""Kiki"",""Rina""]",1,10.0,7,2010-2020
565927,tt10449358,Kaputol,movie,2019,0,120,Drama,nm1368756,Alfred Vargas,1981,actor,"[""Caloy"",""Robert""]",2,10.0,7,2010-2020
565928,tt10449358,Kaputol,movie,2019,0,120,Drama,nm6651713,Ronwaldo Martin,0,actor,"[""Conrad""]",3,10.0,7,2010-2020
565929,tt10449358,Kaputol,movie,2019,0,120,Drama,nm0032629,Angel Aquino,1973,actress,"[""Maybelle""]",4,10.0,7,2010-2020
566407,tt10463270,Shouting Silence,movie,2021,0,95,"Crime,Drama,Thriller",nm12297451,Samir Singh,0,actor,"[""Tushar""]",10,10.0,18,2010-2020
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1036409,tt8974670,Major Capers: The Legend of Team Broadminded,movie,2022,0,88,Documentary,nm10109594,Doc Lowell Burwell,0,actor,0,3,10.0,6,>2021
1039230,tt9080346,Poets Are the Destroyers,movie,2021,0,0,Drama,nm9684744,Aarushi Agni,0,actress,"[""Jazz Singer""]",1,10.0,6,2010-2020
1039231,tt9080346,Poets Are the Destroyers,movie,2021,0,0,Drama,nm10181391,Pablo Javier Alfaro,0,actor,"[""Jazz Musician""]",2,10.0,6,2010-2020
1039232,tt9080346,Poets Are the Destroyers,movie,2021,0,0,Drama,nm10169618,Shepsi Haider,0,actor,"[""Pablo""]",3,10.0,6,2010-2020


In [8]:
for date in names[1:]:
    condi = (
        (rating["cuts"] == date)
    )


    print(f"Average note for {date}, {rating['rating_avg'][condi].median()}")

Average note for 1900-1920, 6.0
Average note for 1920-1940, 6.2
Average note for 1940-1960, 6.3
Average note for 1960-1980, 6.1
Average note for 1980-1990, 6.0
Average note for 1990-2000, 6.0
Average note for 2000-2010, 6.1
Average note for 2010-2020, 6.0
Average note for >2021, 6.4


In [9]:
condi = rating["rating_avg"] > 9

rating["titre_genres"][condi].mode()

0    Drama
Name: titre_genres, dtype: object

Avg rating par genre, avg rating par acteur, avg par director, correlation entre durée et note?