In [1]:
import pandas as pd
import plotly.express as px
from bubbly.bubbly import bubbleplot

In [2]:
film_df = pd.read_csv('filmtv_movies - ENG.csv')
film_df.head()

Unnamed: 0,filmtv_id,title,year,genre,duration,country,directors,actors,avg_vote,critics_vote,public_vote,total_votes,description,notes,humor,rhythm,effort,tension,erotism
0,2,Bugs Bunny's Third Movie: 1001 Rabbit Tales,1982,Animation,76,United States,"David Detiege, Art Davis, Bill Perez",,7.7,8.0,7.0,22,"With two protruding front teeth, a slightly sl...","These are many small independent stories, whic...",3,3,0,0,0
1,3,18 anni tra una settimana,1991,Drama,98,Italy,Luigi Perelli,"Kim Rossi Stuart, Simona Cavallari, Ennio Fant...",6.5,6.0,7.0,4,"Samantha, not yet eighteen, leaves the comfort...","Luigi Perelli, the director of the ""Piovra"", o...",0,2,0,2,0
2,17,Ride a Wild Pony,1976,Romantic,91,United States,Don Chaffey,"Michael Craig, John Meillon, Eva Griffith, Gra...",5.6,6.0,5.0,9,"In the Australia of the pioneers, a boy and a ...","""Ecological"" story with a happy ending, not wi...",1,2,1,0,0
3,18,Diner,1982,Comedy,95,United States,Barry Levinson,"Mickey Rourke, Steve Guttenberg, Ellen Barkin,...",7.0,8.0,6.0,18,Five boys from Baltimore have a habit of meeti...,A cast of will be famous for Levinson's direct...,2,2,0,1,2
4,20,A che servono questi quattrini?,1942,Comedy,85,Italy,Esodo Pratelli,"Eduardo De Filippo, Peppino De Filippo, Clelia...",5.9,5.33,7.0,15,"With a stratagem, the penniless and somewhat p...",Taken from the play by Armando Curcio that the...,3,1,1,0,0


In [3]:
# count null values
film_df.isnull().sum()

filmtv_id           0
title               0
year                0
genre              95
duration            0
country            11
directors          33
actors           2052
avg_vote            0
critics_vote     4600
public_vote       474
total_votes         0
description      1455
notes           21847
humor               0
rhythm              0
effort              0
tension             0
erotism             0
dtype: int64

In [4]:
# drop null values 
film_df = film_df.dropna(subset=['genre'])
film_df = film_df.dropna(subset=['country'])

In [5]:
# Group the filtered DataFrame by year, country, and genre, and count the number of films
film_count = film_df.groupby(['year', 'country', 'genre']).size().reset_index(name='count')

top_country = film_count.groupby('country')['count'].sum().idxmax()
# Filter the data for the top country
film_count_top_country = film_count[(film_count['country'] == top_country)]

In [6]:
# Create the bar chart using Plotly Express
fig = px.bar(film_count_top_country, x='year', y='count', color='genre',labels={'count': 'Number of Films'},
        title=f'Number of Films per Year in {top_country}')
# Customize the layout
fig.update_layout(xaxis_title='Year', yaxis_title='Number of Films', xaxis=dict(type='category'),  # Use categorical x-axis for years
        showlegend=True, legend_title='Genre', height=600)
fig.show()

In [7]:
# Focus on the years 2000-2022
filtered_df = film_df[(film_df['year'] >= 2000) & (film_df['year'] <= 2022)]
film_count = filtered_df.groupby(['year', 'country', 'genre']).size().reset_index(name='count')
top_country = film_count.groupby('country')['count'].sum().idxmax()
film_count_top_country = film_count[(film_count['country'] == top_country)]

fig = px.bar(film_count_top_country, x='year', y='count', color='genre', labels={'count': 'Number of Films'},
 title=f'Number of Films per Year in {top_country} - 2000 to 2022')
fig.update_layout( xaxis_title='Year', yaxis_title='Number of Films', xaxis=dict(type='category'),  # Use categorical x-axis for years
 showlegend=True, legend_title='Genre', height=600)
fig.show()

## Movies per decade

In [8]:
# Create a new column for the decade
film_df['decade'] = (film_df['year'] // 10) * 10
# Group the DataFrame by decade and count the number of movies
movies_per_decade = film_df.groupby(['decade', 'genre']).size().reset_index(name='count')


In [9]:
fig = px.bar(movies_per_decade, x='decade', y='count', color='genre', labels={'count': 'Number of Movies'},title='Number of Movies by decade')
fig.update_layout(xaxis_title='Decade', yaxis_title='Number of Movies', xaxis=dict(type='category'),  # Use categorical x-axis for decades
showlegend=False, height=600)
fig.show()

In [13]:
movies_per_decade = film_df.groupby(['decade', 'country']).size().reset_index(name='count')

fig = px.bar(movies_per_decade, x='decade', y='count', color='country', labels={'count': 'Number of Movies'},title='Number of Movies by decade')
fig.update_layout(xaxis_title='Decade', yaxis_title='Number of Movies', xaxis=dict(type='category'), showlegend=False, height=600)
fig.show()

## Films by country

In [10]:
# Group the DataFrame by year, country, and count the number of films
films_by_year_country = film_df.groupby(['year', 'country']).size().reset_index(name='count')

# Create an animated choropleth map using Plotly Express
fig = px.choropleth( films_by_year_country, locations='country', locationmode='country names', color='count', hover_name='country',
    color_continuous_scale= px.colors.sequential.PuRd, labels={'count': 'Number of Films'}, title='Number of Films by Country', animation_frame='year')

fig.update_layout(geo=dict(showcoastlines=True), height=600)

fig.show()

## Genre by country

In [11]:
import plotly.graph_objects as go

top_countries = film_df['country'].value_counts().head(10).index

# Define the number of top genres to consider
top_genre_count = 10

# Create pie charts for each of the top countries and their top 10 genres
for country in top_countries:
    country_df = film_df[film_df['country'] == country]
    
    # Calculate the distribution of the top 10 genres for the current country
    top_genres = country_df['genre'].value_counts().head(top_genre_count)
    
    fig = go.Figure(data=[go.Pie(labels=top_genres.index, values=top_genres.values)])
    
    fig.update_layout(
        title=f'Top {top_genre_count} Genres in {country}',
        showlegend=True,
    )
    fig.show()
