## Setup

In [None]:
import psycopg2
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import seaborn as sns

In [None]:
# Connect to postgres database
db_config = {
    'dbname': 'movie_db',
    'user': 'postgres',
    'password': 'DB_PASSWORD',
    'host': 'HOST',
    'port': '5333'
}
conn = psycopg2.connect(**db_config)

## Queries

In [None]:
# Filter out too small movies
# WHERE revenue > 0 AND budget > 0 AND runtime > 45

In [None]:
# Top 100 movies by revenue
query = """
SELECT title, revenue, budget
FROM movies
ORDER BY revenue DESC
LIMIT 100;
"""

df_top_revenue = pd.read_sql(query, conn)
df_top_revenue

In [None]:
# Top 100 movies by rating
query = """
SELECT title, mp.vote_average
FROM movies
JOIN movies_popularity mp ON movies.id = mp.id
ORDER BY vote_average DESC
LIMIT 100;
"""

df_top_rating = pd.read_sql(query, conn)
df_top_rating

In [None]:
# Number of movies by production country
query = """
SELECT pc.name, COUNT(*) AS num_movies
FROM productioncountries pc
JOIN movieproductioncountries mpc ON pc.iso_3166_1 = mpc.iso_3166_1
GROUP BY pc.name
ORDER BY num_movies DESC;
"""

df_num_movies_by_country = pd.read_sql(query, conn)
df_num_movies_by_country

In [None]:
# Number of movies by genre
query = """
SELECT g.name, COUNT(*) AS num_movies
FROM genres g
JOIN moviegenres mg ON g.id = mg.genre_id
GROUP BY g.name
ORDER BY num_movies DESC;
"""

df_num_movies_by_genre = pd.read_sql(query, conn)
df_num_movies_by_genre

In [None]:
# Average revenue by genre
query = """
SELECT g.name AS genre, AVG(m.revenue) AS average_revenue
FROM Genres g
JOIN MovieGenres mg ON g.id = mg.genre_id
JOIN Movies m ON mg.movie_id = m.id
GROUP BY g.name
ORDER BY average_revenue DESC;
"""

df_average_revenue_by_genre = pd.read_sql(query, conn)
df_average_revenue_by_genre

In [None]:
# Movie name and popularity score by day for the last 30 days
query = """
SELECT title, popularity, date
FROM movies
JOIN movies_popularity mp ON movies.id = mp.movie_id
WHERE date >= NOW() - INTERVAL '30 days'
ORDER BY popularity DESC;
"""

df_popularity_by_day = pd.read_sql(query, conn)
df_popularity_by_day

In [None]:
# Movie count by spoken language
query = """
SELECT sl.english_name, COUNT(*) AS num_movies
FROM spokenlanguages sl
JOIN moviespokenlanguages msl ON sl.iso_639_1 = msl.iso_639_1
GROUP BY sl.english_name
ORDER BY num_movies DESC;
"""

df_num_movies_by_language = pd.read_sql(query, conn)
df_num_movies_by_language

In [None]:
# Movie count by release year by production country
query = """
SELECT pc.name, EXTRACT(YEAR FROM release_date) AS release_year, COUNT(*) AS num_movies
FROM productioncountries pc
JOIN movieproductioncountries mpc ON pc.iso_3166_1 = mpc.iso_3166_1
JOIN movies ON mpc.movie_id = movies.id
GROUP BY pc.name, release_year
ORDER BY release_year, pc.name;
"""

df_num_movies_by_year_by_country = pd.read_sql(query, conn)
df_num_movies_by_year_by_country

In [None]:
# Average movie runtime
query = """
SELECT AVG(runtime) AS average_runtime
FROM Movies;
"""

df_average_runtime = pd.read_sql(query, conn)
df_average_runtime

In [None]:
# Number of movies by production company
query = """
SELECT pc.name AS company, COUNT(*) AS movie_count
FROM ProductionCompanies pc
JOIN MovieProductionCompanies mpc ON pc.id = mpc.production_company_id
GROUP BY pc.name
ORDER BY movie_count DESC;
"""

df_num_movies_by_company = pd.read_sql(query, conn)
df_num_movies_by_company

In [None]:
# Most cooperation between production companies
query = """
SELECT pc1.name AS company1, pc2.name AS company2, COUNT(*) AS movie_count
FROM ProductionCompanies pc1
JOIN MovieProductionCompanies mpc1 ON pc1.id = mpc1.production_company_id
JOIN MovieProductionCompanies mpc2 ON mpc1.movie_id = mpc2.movie_id
JOIN ProductionCompanies pc2 ON mpc2.production_company_id = pc2.id
WHERE pc1.name < pc2.name
GROUP BY pc1.name, pc2.name
ORDER BY movie_count DESC;
"""

df_cooperation = pd.read_sql(query, conn)
df_cooperation

In [None]:
# Average revenue by production company
query = """
SELECT pc.name AS company, AVG(m.revenue) AS average_revenue
FROM ProductionCompanies pc
JOIN MovieProductionCompanies mpc ON pc.id = mpc.production_company_id
JOIN Movies m ON mpc.movie_id = m.id
GROUP BY pc.name
ORDER BY average_revenue DESC;
"""

df_average_revenue_by_company = pd.read_sql(query, conn)
df_average_revenue_by_company

In [None]:
# Total revenue of all movies produced by production company
query = """
SELECT pc.name AS company, SUM(m.revenue) AS total_revenue
FROM ProductionCompanies pc
JOIN MovieProductionCompanies mpc ON pc.id = mpc.production_company_id
JOIN Movies m ON mpc.movie_id = m.id
GROUP BY pc.name
ORDER BY total_revenue DESC;
"""

df_total_revenue_by_company = pd.read_sql(query, conn)
df_total_revenue_by_company

In [None]:
query = """
SELECT pc.name AS country, g.name AS genre, COUNT(*) AS genre_count
FROM genres g
JOIN moviegenres mg ON g.id = mg.genre_id
JOIN movieproductioncountries mpc ON mg.movie_id = mpc.movie_id
JOIN productioncountries pc ON mpc.iso_3166_1 = pc.iso_3166_1
GROUP BY pc.name, g.name
ORDER BY pc.name, genre_count DESC;
"""

df_num_movies_by_country_by_genre = pd.read_sql(query, conn)
df_num_movies_by_country_by_genre

In [None]:
query = """
    SELECT 
        SPLIT_PART(name, ' ', -1) AS last_name
    FROM 
        people
"""

df_last_names = pd.read_sql(query, conn)
df_last_names

In [None]:
query = """
SELECT CASE 
    WHEN gender = 0 THEN 'Not set / not specified'
    WHEN gender = 1 THEN 'Female'
    WHEN gender = 2 THEN 'Male'
    WHEN gender = 3 THEN 'Non-binary'
    ELSE 'Unknown'
    END AS gender_label,
    popularity
FROM people
JOIN people_popularity ON people.id = people_popularity.person_id
"""

df_gender_distribution = pd.read_sql(query, conn)
df_gender_distribution

In [None]:
# Top actors
query = """
    SELECT p.name, AVG(pp.popularity) AS avg_popularity
    FROM people p
    JOIN people_popularity pp ON p.id = pp.person_id
    GROUP BY p.name
    ORDER BY avg_popularity DESC
    LIMIT 10
"""

df_top_actors = pd.read_sql(query, conn)
df_top_actors

In [None]:
# Average department popularity
query = """
    SELECT known_for_department, AVG(popularity) as avg_popularity
    FROM people
    JOIN people_popularity ON people.id = people_popularity.person_id
    GROUP BY known_for_department
"""

df_department_popularity = pd.read_sql(query, conn)
df_department_popularity

In [None]:
# Get all people
query = """
    SELECT *
    FROM people
"""

df_people = pd.read_sql(query, conn)
df_people

In [None]:
# All movies with revenue and budget that are not adult movies
query = """
    SELECT *
    FROM movies
    WHERE revenue > 0 AND budget > 0 AND runtime > 45 AND adult = FALSE
"""

df_movies = pd.read_sql(query, conn)
df_movies

In [None]:
# All movies unfiltered
query = """
    SELECT *
    FROM movies
"""

df_movies_unfiltered = pd.read_sql(query, conn)
df_movies_unfiltered

In [None]:
# All genres and moviegenres
query = """
    SELECT *
    FROM genres
"""

df_genres = pd.read_sql(query, conn)

query = """
    SELECT *
    FROM moviegenres
"""

df_moviegenres = pd.read_sql(query, conn)

In [None]:
# Filtered budget and revenue
query = """
SELECT budget, revenue
FROM Movies
WHERE budget > 0 AND revenue > 0 AND RUNTIME > 0;
"""

budget_vs_revenue = pd.read_sql(query, conn)
budget_vs_revenue

In [None]:
# runtim revenue distribution
query = """
SELECT runtime, revenue
FROM Movies
WHERE runtime > 0 AND revenue > 0 AND budget > 0;
"""

runtime_vs_revenue = pd.read_sql(query, conn)
runtime_vs_revenue

In [None]:
query = """
SELECT original_language, COUNT(*) as num_movies
FROM Movies
GROUP BY original_language
ORDER BY num_movies DESC
"""

language_diversity_in_movies = pd.read_sql(query, conn)
language_diversity_in_movies

## Visualization

In [None]:
# Top 100 movies by revenue plotly plot
fig_revenue_top_100_bar = px.bar(df_top_revenue, x='title', y='revenue', title='Top 100 movies by revenue')
fig_revenue_top_100_bar.show()

In [None]:
# Top 100 movies by rating plotly plot
fig_top_100_bar = px.bar(df_top_rating, x='title', y='vote_average', title='Top 100 movies by rating')
fig_top_100_bar.show()

In [None]:
# Number of movies by production country plotly plot (top 20)
fig_top_countries_bar = px.bar(df_num_movies_by_country[:20], x='name', y='num_movies',
                               title='Number of movies by production country')
fig_top_countries_bar.show()

In [None]:
# Number of movies by production company plotly plot (top 20)
fig_top_companies_bar = px.bar(df_num_movies_by_company[:20], x='company', y='movie_count',
                               title='Number of movies by production company')
fig_top_companies_bar.show()

In [None]:
# Total revenue by production company plotly plot (top 20)
fig_total_revenue_by_company_bar = px.bar(df_total_revenue_by_company[:20], x='company', y='total_revenue',
                                          title='Total revenue by production company')
fig_total_revenue_by_company_bar.show()

In [None]:
# Average revenue by genre plotly plot
fig_average_revenue_by_genre_bar = px.bar(df_average_revenue_by_genre, x='genre', y='average_revenue',
                                          title='Average revenue by genre')
fig_average_revenue_by_genre_bar.show()

In [None]:
# Cooperation between production companies plotly plot (top 20)
fig_cooperation_bar = px.bar(df_cooperation[:20], x='company2', y='movie_count', color='movie_count',
                             animation_frame='company1', title='Cooperation between production companies')
fig_cooperation_bar.show()

In [None]:
# Number of movies by production country plotly plot (top 20) (Map)
fig_top_countries_map = px.choropleth(df_num_movies_by_country[:20], locations='name', locationmode='country names',
                                      color='num_movies', title='Number of movies by production country')
fig_top_countries_map.show()

In [None]:
# Number of movies by genre plotly plot (top 20)
fig_top_genres_bar = px.bar(df_num_movies_by_genre[:20], x='name', y='num_movies', title='Number of movies by genre')
fig_top_genres_bar.show()

In [None]:
# Select the 20 top movies by popularity that also have the most entries
df_popularity_by_day_top_30 = df_popularity_by_day[
    df_popularity_by_day['title'].isin(df_popularity_by_day['title'].value_counts()[:30].index)]
# order by data and title and popularity
df_popularity_by_day_top_30 = df_popularity_by_day_top_30.sort_values(by=['date', 'popularity', 'title'],
                                                                      ascending=False)

# Animated bar chart for top 20 movies by popularity
fig_popularity_top_30 = px.bar(df_popularity_by_day_top_30, x='title', y='popularity', animation_frame='date',
                               animation_group='title', title='Movies by popularity')
# , range_y=[0, 600]
fig_popularity_top_30.update_layout(transition={'duration': 2000})
fig_popularity_top_30.show()

In [None]:
# Number of movies by spoken language plotly plot (top 15)
fig_top_languages = px.bar(df_num_movies_by_language[:15], y='english_name', x='num_movies',
                           title='Number of movies by spoken language', orientation='h')
fig_top_languages.show()

In [None]:
# Number of movies by release year by production country plotly plot
fig_releases_by_year_map = px.choropleth(df_num_movies_by_year_by_country, locations='name',
                                         locationmode='country names', color='num_movies',
                                         animation_frame='release_year',
                                         title='Number of movies by release year by production country',
                                         range_color=[0, 5000])
fig_releases_by_year_map.show()

In [None]:
# Accumulate number of movies by release year
df_num_movies_by_year_by_country_accumulated = df_num_movies_by_year_by_country.groupby(
    ['release_year', 'name']).sum().groupby(level=[1]).cumsum().reset_index()
# fill missing years
df_num_movies_by_year_by_country_accumulated = df_num_movies_by_year_by_country_accumulated.set_index(
    ['release_year', 'name']).unstack().fillna(method='ffill').stack().reset_index()
df_num_movies_by_year_by_country_accumulated

In [None]:
# Number of movies by release year by production country plotly plot (accumulated)
fig_releases_by_year_acc_map = px.choropleth(df_num_movies_by_year_by_country_accumulated, locations='name',
                                             locationmode='country names', color='num_movies',
                                             animation_frame='release_year',
                                             title='Number of movies by release year by production country (accumulated)',
                                             range_color=[0, 140000])
fig_releases_by_year_acc_map.show()

In [None]:
# Top genre by production country on plotly map
df_num_movies_by_country_by_genre_top = df_num_movies_by_country_by_genre.groupby(['country']).first().reset_index()

fig_top_genre_by_country_map = px.choropleth(df_num_movies_by_country_by_genre_top, locations='country',
                                             locationmode='country names', color='genre',
                                             title='Top genre by production country')
fig_top_genre_by_country_map.show()

In [None]:
from wordcloud import WordCloud
from plotly import graph_objects as go
from PIL import ImageFont

font_path = '/usr/share/fonts/truetype/ubuntu/Ubuntu-R.ttf'

ImageFont.truetype(font=font_path, size=30)

wordcloud = WordCloud(
    width=800,
    height=400,
    background_color='white',
    colormap='Dark2',
    max_words=50,
    font_path=font_path  # Specify the font path here
).generate(' '.join(df_last_names['last_name']))

fig_wordcloud = go.Figure(data=go.Image(z=wordcloud.to_array(),
                                        hoverinfo='none'))

fig_wordcloud.update_layout(
    width=800,
    height=400,
    xaxis=dict(visible=False),
    yaxis=dict(visible=False),
    plot_bgcolor='rgba(0,0,0,0)'
)

fig_wordcloud.show()

In [None]:
# Gender distribution
fig_gender_distribution = px.box(df_gender_distribution, x="gender_label", y="popularity")
fig_gender_distribution.show()

In [None]:
# Top actors bar plot
fig_top_actors_bar = px.bar(df_top_actors, x='name', y='avg_popularity', title='Top 10 actors by average popularity')
fig_top_actors_bar.show()

In [None]:
# Average department popularity bar plot
fig_department_popularity_bar = px.bar(df_department_popularity, x='known_for_department', y='avg_popularity',
                                       title='Average department popularity')
fig_department_popularity_bar.show()

In [None]:
# Gender distribution of people
gender_counts = df_people['gender'].map({1: 'female', 2: 'male', 3: 'non-binary'}).value_counts()
gender_counts_df = pd.DataFrame({'gender': gender_counts.index, 'count': gender_counts.values})

fig_gender_distribution_pie = px.pie(gender_counts_df, values='count', names='gender')
fig_gender_distribution_pie.show()

In [None]:
# Budget distribution of df_movies
fig_budget_distribution = px.histogram(df_movies, x='budget', title='Budget distribution')
fig_budget_distribution.show()

In [None]:
# calculate the profit for each movie
df_movies['profit'] = df_movies['revenue'] - df_movies['budget']

# Merge the movies and movie_genres dataframes
movies_and_genres = pd.merge(df_movies, df_moviegenres, left_on='id', right_on='movie_id')

# Merge the movies_genres and genres dataframes
movies_genres_and_names = pd.merge(movies_and_genres, df_genres, left_on='genre_id', right_on='id')

# Calculate the average profit for each genre
average_profit_per_genre = movies_genres_and_names.groupby('name')['profit'].mean()

# Sort the genres by average profit
average_profit_per_genre.sort_values(ascending=False, inplace=True)

# Plot the average profit per genre
fig_average_profit_per_genre_bar = px.bar(average_profit_per_genre, x=average_profit_per_genre.index,
                                          y=average_profit_per_genre.values, title='Average profit per genre')
fig_average_profit_per_genre_bar.show()

In [None]:
from PIL import Image
import io
from matplotlib import pyplot as plt
from plotly.graph_objs import Figure

# Create a seaborn heatmap
plt.figure(figsize=(8, 6), dpi=100)
sns.heatmap(df_movies_unfiltered.isnull(), cbar=False)
plt.title('Missing Values Heatmap')
plt.tight_layout()

# Convert the seaborn plot to an image
buf = io.BytesIO()
plt.savefig(buf, format='png')
buf.seek(0)
image = Image.open(buf)

In [None]:
# Calculate the return
budget_vs_revenue['return'] = budget_vs_revenue['revenue'] - budget_vs_revenue['budget']

# Filter
filtered_df = budget_vs_revenue[(budget_vs_revenue['budget'] > 0) & (budget_vs_revenue['revenue'] > 0)]

# Positive and negative returns
positive_return_count = (filtered_df['return'] > 0).sum()
negative_return_count = (filtered_df['return'] < 0).sum()

# Data for plotting
return_counts = [positive_return_count, negative_return_count]
categories = ['Positive Return', 'Negative Return']

# Create the plotly bar chart
fig_return_bar = px.bar(x=categories, y=return_counts, title='Return of movies')
fig_return_bar.show()

In [None]:
# Distribution of runtime
fig_runtime_distribution = px.histogram(runtime_vs_revenue, x='runtime', nbins=30,
                                        title='Distribution of Movie Runtimes',
                                        labels={'runtime': 'Runtime (minutes)'},
                                        color_discrete_sequence=['skyblue'])

fig_runtime_distribution.update_layout(bargap=0.1)
fig_runtime_distribution.update_traces(marker=dict(line=dict(color='black', width=2)))

fig_runtime_distribution.show()

In [None]:
top_languages = language_diversity_in_movies.nlargest(10, 'num_movies')
other_languages = pd.DataFrame(data={
    "original_language": ["Other"],
    "num_movies": [language_diversity_in_movies.iloc[10:].num_movies.sum()]
})
consolidated_languages = pd.concat([top_languages, other_languages])

fig_language_diversity = px.pie(consolidated_languages, values='num_movies', names='original_language',
                                title='Language diversity in movies')
fig_language_diversity.show()

## Dashboard

In [None]:
import dash
from dash import dcc
from dash import html
import dash_bootstrap_components as dbc

In [None]:
app = dash.Dash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP])

card_style = {
    'margin': '10px',
    'padding': '10px',
    'boxShadow': '0 4px 8px 0 rgba(0,0,0,0.2)'
}

app.layout = dbc.Container([
    dbc.Row(dbc.Col(html.H1('Movie Dashboard', className='text-center my-4'), width=12)),

    dbc.Row(dbc.Col(html.H2('Data overview', className='my-4 mx-10'), width=12)),

    dbc.Col(dbc.Card([
        html.H3("Missing Values Heatmap"),
        html.Div([
            dcc.Graph(
                id='heatmap',
                figure={
                    'data': [{
                        'x': [0, 1],
                        'y': [0, 1],
                        'mode': 'markers',
                        'marker': {
                            'opacity': 0
                        }
                    }],
                    'layout': {
                        'images': [go.layout.Image(
                            source=image,
                            xref="x",
                            yref="y",
                            x=0,
                            y=1,
                            sizex=1,
                            sizey=1,
                            sizing="stretch",
                            opacity=1,
                            layer="below"
                        )],
                        'xaxis': {'visible': False, 'range': [0, 1]},
                        'yaxis': {'visible': False, 'range': [0, 1]},
                        'width': 800,
                        'height': 600,
                        'title': 'Missing Values Heatmap'
                    }
                }
            )
        ]),
    ], style=card_style), md=6),

    dbc.Row(dbc.Col(html.H2('Movies', className='my-4 mx-10'), width=12)),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Distribution of Movie Runtimes', className='card-header'),
            dcc.Graph(id='fig_runtime_distribution', figure=fig_runtime_distribution)
        ], style=card_style), md=6),

        dbc.Col(dbc.Card([
            html.H3('Top 100 movies by revenue', className='card-header'),
            dcc.Graph(id='revenue_top_100', figure=fig_revenue_top_100_bar)
        ], style=card_style), md=6),
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Number of movies by genre', className='card-header'),
            dcc.Graph(id='num_movies_by_genre', figure=fig_top_genres_bar)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Average revenue by genre', className='card-header'),
            dcc.Graph(id='fig_average_revenue_by_genre_bar', figure=fig_average_revenue_by_genre_bar)
        ], style=card_style), md=6)
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Movies by popularity and date', className='card-header'),
            dcc.Graph(id='movies_by_popularity', figure=fig_popularity_top_30)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Number of movies by release year by production country', className='card-header'),
            dcc.Graph(id='num_movies_by_year_by_country', figure=fig_releases_by_year_map)
        ], style=card_style), md=6)
    ]),

    dbc.Col(dbc.Card([
        html.H3('Budget distribution', className='card-header'),
        dcc.Graph(id='fig_budget_distribution', figure=fig_budget_distribution)
    ], style=card_style), md=6),

    dbc.Row(dbc.Col(html.H2('Production', className='my-4 mx-10'), width=12)),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Language diversity in movies', className='card-header'),
            dcc.Graph(id='fig_language_diversity', figure=fig_language_diversity)
        ], style=card_style), md=6),

        dbc.Col(dbc.Card([
            html.H3('Profit distribution by genre', className='card-header'),
            dcc.Graph(id='fig_average_profit_per_genre_bar', figure=fig_average_profit_per_genre_bar)
        ], style=card_style), md=6),
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Return of movies', className='card-header'),
            dcc.Graph(id='fig_return_bar', figure=fig_return_bar)
        ], style=card_style), md=6),

        dbc.Col(dbc.Card([
            html.H3('Map of production countries', className='card-header'),
            dcc.Graph(id='num_movies_by_country_map', figure=fig_top_countries_map)
        ], style=card_style), md=6),
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Number of movies by production country', className='card-header'),
            dcc.Graph(id='num_movies_by_country', figure=fig_top_countries_bar)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Number of movies by production company', className='card-header'),
            dcc.Graph(id='num_movies_by_company', figure=fig_top_companies_bar)
        ], style=card_style), md=6)
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Total revenue by production company', className='card-header'),
            dcc.Graph(id='total_revenue_by_company', figure=fig_total_revenue_by_company_bar)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Cooperation between production companies', className='card-header'),
            dcc.Graph(id='cooperation_between_companies', figure=fig_cooperation_bar)
        ], style=card_style), md=6)
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Top genre by production country', className='card-header'),
            dcc.Graph(id='fig_top_genre_by_country_map', figure=fig_top_genre_by_country_map)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Number of movies by spoken language', className='card-header'),
            dcc.Graph(id='num_movies_by_language', figure=fig_top_languages)
        ], style=card_style), md=6)
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Number of movies by release year by production country (accumulated)', className='card-header'),
            dcc.Graph(id='num_movies_by_year_by_country_acc', figure=fig_releases_by_year_acc_map)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Average department popularity', className='card-header'),
            dcc.Graph(id='department_popularity', figure=fig_department_popularity_bar)
        ], style=card_style), md=6)
    ]),

    dbc.Row(dbc.Col(html.H2('People', className='my-4 mx-10'), width=12)),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Wordcloud of last names', className='card-header'),
            dcc.Graph(id='wordcloud', figure=fig_wordcloud)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Top actors by average popularity', className='card-header'),
            dcc.Graph(id='top_actors', figure=fig_top_actors_bar)
        ], style=card_style), md=6)
    ]),

    dbc.Row([
        dbc.Col(dbc.Card([
            html.H3('Gender distribution', className='card-header'),
            dcc.Graph(id='fig_gender_distribution_pie', figure=fig_gender_distribution_pie)
        ], style=card_style), md=6),
        dbc.Col(dbc.Card([
            html.H3('Gender distribution', className='card-header'),
            dcc.Graph(id='gender_distribution', figure=fig_gender_distribution)
        ], style=card_style), md=6),
    ]),

    html.Footer('Data provided by TMDB',
                className='text-center my-4')
], fluid=True)

app.run_server(debug=True)