In [112]:
import pandas as pd
import numpy as np
import plotly.express as px 
from plotly.subplots import make_subplots
import plotly.graph_objects as go

In [113]:
name_df = pd.read_csv("https://datasets.imdbws.com/name.basics.tsv.gz", sep="\t")

In [114]:
principals_df = pd.read_csv("https://datasets.imdbws.com/title.principals.tsv.gz", sep="\t")

In [115]:
df_actors = principals_df.loc[(principals_df['category']=='actor')|(principals_df['category']=='actress')]

In [116]:
list_actors = pd.merge(df_actors,name_df,how='inner')

In [117]:
list_actors['primaryName'].value_counts()

Sameera Sherief        10126
Delhi Kumar             7834
Subhalekha Sudhakar     7146
Neha Gowda              6213
Sudha Chandran          5877
                       ...  
Jon Niccum                 1
Craig Cove                 1
Kayra Pitts                1
Geoffrey Pinkeye           1
Andreas Demmel             1
Name: primaryName, Length: 1948531, dtype: int64

In [118]:
movie_genre_df = pd.read_csv("https://datasets.imdbws.com/title.basics.tsv.gz", sep="\t",low_memory=False,usecols=['tconst', 'startYear','titleType', 'isAdult', 'runtimeMinutes'])[['tconst','startYear', 'titleType', 'isAdult', 'runtimeMinutes']]

In [119]:
actors_titleType = pd.merge(list_actors,movie_genre_df,how='inner')

In [120]:
actors_movies = actors_titleType.loc[actors_titleType['titleType']=='movie']

In [121]:

actors_movies_nan = actors_movies.replace('\\N', pd.NaT)
actors_movies_filtre = actors_movies_nan[['primaryName','startYear']]

  

In [122]:
actors_movies_clean = actors_movies_filtre.dropna()

In [123]:
actors_movies_clean = actors_movies_clean.astype({'startYear': int})
actors_movies_year = actors_movies_clean.loc[actors_movies_clean['startYear']>=1920]

In [124]:
depart = 1920
fin = 1929
subplot = []
for i in range(11):
    actors_movies_decade = actors_movies_year.loc[(actors_movies_year['startYear']>=depart)&(actors_movies_year['startYear']<=fin)]
    temp = actors_movies_decade['primaryName'].value_counts()[:5].rename_axis('name').reset_index(name='count')
    subplot.append(temp)
    depart+=10
    fin+=10

In [125]:
fig = make_subplots(
    rows=4, cols=3,
    subplot_titles=('1920-1929', '1930-1939','1940-1949','1950-1959','1960-1969','1970-1979','1980-1989','1990-1999','2000-2009','2010-2019','2020-2029'),
    )

fig.append_trace(
    go.Bar(x=subplot[0]['name'],
    y=subplot[0]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=1, col=1
)

fig.append_trace(
    go.Bar(x=subplot[1]['name'],
    y=subplot[1]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=1, col=2
)

fig.append_trace(
    go.Bar(x=subplot[2]['name'],
    y=subplot[2]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=1, col=3
)

fig.append_trace(
    go.Bar(x=subplot[3]['name'],
    y=subplot[3]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=2, col=1
)

fig.append_trace(
    go.Bar(x=subplot[4]['name'],
    y=subplot[4]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=2, col=2
)

fig.append_trace(
    go.Bar(x=subplot[5]['name'],
    y=subplot[5]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=2, col=3
)

fig.append_trace(
    go.Bar(x=subplot[6]['name'],
    y=subplot[6]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=3, col=1
)

fig.append_trace(
    go.Bar(x=subplot[7]['name'],
    y=subplot[7]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=3, col=2
)

fig.append_trace(
    go.Bar(x=subplot[8]['name'],
    y=subplot[8]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=3, col=3
)

fig.append_trace(
    go.Bar(x=subplot[9]['name'],
    y=subplot[9]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=4, col=1
)

fig.append_trace(
    go.Bar(x=subplot[10]['name'],
    y=subplot[10]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=4, col=2
)

fig.update_layout(template='plotly_dark',showlegend=False,height = 1250,width=1000)
fig.show()

In [139]:
actors_series = actors_titleType.loc[actors_titleType['titleType']=='tvSeries']
actors_series_nan = actors_series.replace('\\N', pd.NaT)
actors_series_filtre = actors_series_nan[['primaryName','startYear']]
actors_series_clean = actors_series_filtre.dropna()
actors_series_clean = actors_series_clean.astype({'startYear': int})
actors_series_year = actors_series_clean.loc[actors_series_clean['startYear']>=1920]

In [141]:
depart = 1920
fin = 1929
subplot = []
for i in range(11):
    actors_series_decade = actors_series_year.loc[(actors_series_year['startYear']>=depart)&(actors_series_year['startYear']<=fin)]
    temp = actors_series_decade['primaryName'].value_counts()[:5].rename_axis('name').reset_index(name='count')
    subplot.append(temp)
    depart+=10
    fin+=10


In [142]:
fig = make_subplots(
    rows=4, cols=3,
    subplot_titles=('1920-1929', '1930-1939','1940-1949','1950-1959','1960-1969','1970-1979','1980-1989','1990-1999','2000-2009','2010-2019','2020-2029'),
    )

fig.append_trace(
    go.Bar(x=subplot[0]['name'],
    y=subplot[0]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=1, col=1
)

fig.append_trace(
    go.Bar(x=subplot[1]['name'],
    y=subplot[1]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=1, col=2
)

fig.append_trace(
    go.Bar(x=subplot[2]['name'],
    y=subplot[2]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=1, col=3
)

fig.append_trace(
    go.Bar(x=subplot[3]['name'],
    y=subplot[3]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=2, col=1
)

fig.append_trace(
    go.Bar(x=subplot[4]['name'],
    y=subplot[4]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=2, col=2
)

fig.append_trace(
    go.Bar(x=subplot[5]['name'],
    y=subplot[5]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=2, col=3
)

fig.append_trace(
    go.Bar(x=subplot[6]['name'],
    y=subplot[6]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=3, col=1
)

fig.append_trace(
    go.Bar(x=subplot[7]['name'],
    y=subplot[7]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=3, col=2
)

fig.append_trace(
    go.Bar(x=subplot[8]['name'],
    y=subplot[8]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=3, col=3
)

fig.append_trace(
    go.Bar(x=subplot[9]['name'],
    y=subplot[9]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=4, col=1
)

fig.append_trace(
    go.Bar(x=subplot[10]['name'],
    y=subplot[10]['count'],
    marker_color=px.colors.qualitative.Plotly),
    row=4, col=2
)

fig.update_layout(template='plotly_dark',showlegend=False,height = 1250,width=1000)
fig.show()