In [None]:
import os
import pickle
import spacy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.offline as offline
import plotly.graph_objs as go
from Cluster_analysis import *
pd.options.mode.chained_assignment = None


Hovertemplate = "<b>%{hovertext}</b><br><br>" + "Movie: %{customdata[0]}<br>" + "Release date: %{customdata[1]}<br>" + "Actor name: %{customdata[2]}<br>" + "Actor age at release: %{customdata[3]}<br>" + "Gender: %{customdata[4]}<br>" + "Character archetype: %{customdata[6]}<br>" + "Description: %{customdata[7]}<br>" + "Genres: %{customdata[8]}<br>" + "Box office revenue: %{customdata[9]}<br>"



## Loading data 

In [None]:
PATH = 'Data/final_df.csv'
# Load final_df from csv file
df = pd.read_csv(PATH, sep='\t')
# Fill missing values with 'Not Available' for all columns except 'Partner'
df = df.fillna('Not Available')
# Replace 'Not Available' with 'No partner' for 'Partner' column
df['partner'] = df['partner'].replace('Not Available', 'No partner')
# Convert the elements of the partner column that are not 'No partner' to a list
df['partner'] = df['partner'].apply(
    lambda x: x[1:-1].split(',') if x != 'No partner' else x)
# Remove the square brackets from the elements of the partner column that are not 'No partner', also remove the spaces and the single quotes
df['partner'] = df['partner'].apply(
    lambda x: [y.strip(' ').strip('\'') for y in x] if x != 'No partner' else x)

titles = ['Decision-makers', 'Heroes & anti-heroes', 'Femme fatale', 'Cunning evil', 'Clumsy antagonist', 'Virtuous', 'Righteous warrior', 'Benevolent leader', 'Eccentric mentor', 'Arrogant leader', 'Naive ingénu', 'Tycoon', 'Ruthless commander', 'Arrogant leader', 'Love interest',
          'Reconciliator', 'Adventurous woman', 'Apprentice', 'Young lover', 'Logistician', 'Lawyer', 'Stubborn fool', 'Eccentric vilain', 'Marksman', 'Goofy friend', 'Hardworking learner', 'Benevolent friend', 'Sophisticated psycopath', 'Nemesis', 'Corrupt', 'Good cop', 'Protector', 'Young girl']

# computes centroid of each cluster
centroids = compute_centroids(df, 'labels')


## Full graph

In [None]:
store = False
# Plotly 3D scatter plot
fig = px.scatter_3d(df, x='X', y='Y', z='Z', color='labels', 
                    color_continuous_scale=px.colors.cyclical.HSV, hover_name='Character name', hover_data={'Name': True,
                    'Release date': True, 'Actor name': True, 'Actor age at release': True, 'Gender': True, 'X': False,
                                                                                                            'Y': False, 'Z': False, 'labels': False, 'title': True, 'filtered_descriptions': True, 'Genres': True, 'Box office revenue': True, 'partner': False})
fig = add_centroids(fig, centroids, titles)
fig = set_layout(fig, df, Hovertemplate)
fig.show()
if store:
    fig.write_html("Plots/Full_plot.html")

## Romance vs non-romance

In [None]:
store = False
# Make df_romance and df_non_romance
df_romance = df[df['romance'] == True]
df_nonromance = df[df['romance'] == False]
fig_rom = px.scatter_3d(df_romance, x='X', y='Y', z='Z', color='labels',
                    color_continuous_scale=px.colors.cyclical.HSV, hover_name='Character name', hover_data={'Name': True,
                                                                                                            'Release date': True, 'Actor name': True, 'Actor age at release': True, 'Gender': True, 'X': False,
                                                                                                            'Y': False, 'Z': False, 'labels': False, 'title': True, 'filtered_descriptions': True, 'Genres': True, 'Box office revenue': True, 'partner': False})
fig_nonrom = px.scatter_3d(df_nonromance, x='X', y='Y', z='Z', color='labels',
                    color_continuous_scale=px.colors.cyclical.HSV, hover_name='Character name', hover_data={'Name': True,
                                                                                                            'Release date': True, 'Actor name': True, 'Actor age at release': True, 'Gender': True, 'X': False,
                                                                                                            'Y': False, 'Z': False, 'labels': False, 'title': True, 'filtered_descriptions': True, 'Genres': True, 'Box office revenue': True, 'partner': False})
# fig_rom, fig_nonrom = set_layout(fig_rom, df_romance, Hovertemplate), set_layout(fig_nonrom, df_nonromance, Hovertemplate)
trace_nonrom = fig_nonrom.data[0]

fig_rom.add_trace(trace_nonrom)

fig_rom = set_layout(fig_rom, df, Hovertemplate)
fig_rom = add_centroids(fig_rom, centroids, titles)
fig_rom = add_button_romance(fig_rom)
fig.show()
if store:
    fig.write_html("Plots/Romance_plot.html")

## Genders

In [None]:
store = True

# Make three dataframes
df_male = df[df['Gender'] == 'M']
df_female = df[df['Gender'] == 'F']
df_unknown = df[df['Gender'] == 'Not Available']
# Nake scatter plots for the three categories
fig_male = px.scatter_3d(df_male, x='X', y='Y', z='Z', color='labels',
                    color_continuous_scale=px.colors.cyclical.HSV, hover_name='Character name', hover_data={'Name': True,
                                                                                                            'Release date': True, 'Actor name': True, 'Actor age at release': True, 'Gender': True, 'X': False,
                                                                                                            'Y': False, 'Z': False, 'labels': False, 'title': True, 'filtered_descriptions': True, 'Genres': True, 'Box office revenue': True, 'partner': False})

fig_female = px.scatter_3d(df_female, x='X', y='Y', z='Z', color='labels',
                         color_continuous_scale=px.colors.cyclical.HSV, hover_name='Character name', hover_data={'Name': True,
                                                                                                                 'Release date': True, 'Actor name': True, 'Actor age at release': True, 'Gender': True, 'X': False,
                                                                                                                 'Y': False, 'Z': False, 'labels': False, 'title': True, 'filtered_descriptions': True, 'Genres': True, 'Box office revenue': True, 'partner': False})

fig_unknown = px.scatter_3d(df_unknown, x='X', y='Y', z='Z', color='labels',
                         color_continuous_scale=px.colors.cyclical.HSV, hover_name='Character name', hover_data={'Name': True,
                                                                                                                 'Release date': True, 'Actor name': True, 'Actor age at release': True, 'Gender': True, 'X': False,
                                                                                                                 'Y': False, 'Z': False, 'labels': False, 'title': True, 'filtered_descriptions': True, 'Genres': True, 'Box office revenue': True, 'partner': False})

# Fix the layout of the three plots
fig_male, fig_female, fig_unknown = set_layout(fig_male, df, Hovertemplate), set_layout(fig_female, df, Hovertemplate), set_layout(fig_unknown, df, Hovertemplate)
trace_female = fig_female.data[0]
trace_unknown = fig_unknown.data[0]

# Add the female and unknown traces to the male plot
fig_male.add_trace(trace_female)
fig_male.add_trace(trace_unknown)

# Add the centroids, and the button
fig_male = add_centroids(fig_male, centroids, titles)
fig_male = add_button_gender(fig_male)
fig_male.show()
if store:
    fig.write_html("Plot/Gender_plot.html")


## Relations

In [None]:
# For drawing clusters farther apart

df['X'] = df['X'] + df['labels'] * 0.2
df['Y'] = df['Y'] + df['labels'] * 0.2
df['Z'] = df['Z'] + df['labels'] * 0.2

# Bring the points in cluster 1 closer to the centroid of cluster 1, leave the other points unchanged
for i in range(len(df)):
    if df['labels'][i] == 1:
        df['X'][i] = df['X'][i] - 0.2 * (df['X'][i] - centroids[1][0])
        df['Y'][i] = df['Y'][i] - 0.2 * (df['Y'][i] - centroids[1][1])
        df['Z'][i] = df['Z'][i] - 0.2 * (df['Z'][i] - centroids[1][2])


In [None]:
# Recalculate the centroids
centroids = compute_centroids(df, 'labels')

# Plotly 3D scatter plot
fig_relations = px.scatter_3d(df, x='X', y='Y', z='Z', color='labels',
                    color_continuous_scale=px.colors.cyclical.HSV, hover_name='Character name', hover_data={'Name': True,
                                                                                                            'Release date': True, 'Actor name': True, 'Actor age at release': True, 'Gender': True, 'X': False,
                                                                                                            'Y': False, 'Z': False, 'labels': False, 'title': True, 'filtered_descriptions': True, 'partner': True})
# Add lines between characters that are in a relationship. The column 'partner' contains a list of the Freebase character IDs of the characters that the character is in a relationship with.
for i in range(len(df)):
    if df['partner'][i] != 'No partner':
        for j in range(len(df['partner'][i])):
            # Get the index of the character that the character is in a relationship with
            index = df.index[df['Freebase character ID'] == df['partner'][i][j]].tolist()[0]
            # Add a line between the two characters, the line should have 30% opacity
            fig_relations.add_trace(go.Scatter3d(x=[df['X'][i], df['X'][index]], y=[df['Y'][i], df['Y'][index]], z=[df['Z'][i], df['Z'][index]], mode='lines', line=dict(color='white', width=1), opacity=0.2))

fig_relations.update_traces(marker=dict(size=3, colorscale='Viridis'), marker_line_width=1, marker_line_color='DarkSlateGrey', hovertemplate =
            "<b>%{hovertext}</b><br><br>" + "Movie: %{customdata[0]}<br>" + "Release date: %{customdata[1]}<br>" + "Actor name: %{customdata[2]}<br>")
fig_relations = add_centroids(fig_relations, centroids, titles)
fig_relations = set_layout(fig_relations)
# Make initial plot more zoomed in
fig_relations.update_layout(scene_camera_eye=dict(x=0.5, y=0.5, z=0.5))
# Make the height of the plot larger
fig_relations.update_layout(height=800)

fig_relations.show()




## Slider for dates

In [None]:
df["Release date"].unique()

#copy df with NaN values replacing 'Not Available'
df2 = df.copy()
df2['Release date'] = df2['Release date'].replace('Not Available', 0)

df2['Release date'].unique()


In [None]:
df2["Release date"]

# Convert the 'date' column to a floprintat
if df2['Release date'][0] > 1000:
    print("yes")

#Partition df2 in 5 dfs, one for each time period with apply method
df_1 = df2[df2['Release date'].apply(lambda x: x < 1920)]
df_2 = df2[df2['Release date'].apply(lambda x: x >= 1920 and x < 1940)]
df_3 = df2[df2['Release date'].apply(lambda x: x >= 1940 and x < 1960)]
df_4 = df2[df2['Release date'].apply(lambda x: x >= 1960 and x < 1980)]
df_5 = df2[df2['Release date'].apply(lambda x: x >= 1980)]


In [None]:
#Partition df2 in 5 dfs, one for each time period with apply method
df_1 = df2[df2['Release date'].apply(lambda x: x < 1920)]
df_2 = df2[df2['Release date'].apply(lambda x: x >= 1920 and x < 1940)]
df_3 = df2[df2['Release date'].apply(lambda x: x >= 1940 and x < 1960)]
df_4 = df2[df2['Release date'].apply(lambda x: x >= 1960 and x < 1980)]
df_5 = df2[df2['Release date'].apply(lambda x: x >= 1980)]

#Create a plotly graph with 5 traces (one for each time period)
fig_time = go.Figure()
fig_time.add_trace(go.Scatter3d(x=df_1['X'], y=df_1['Y'], z=df_1['Z'], mode='markers', marker=dict(size=3, colorscale='Viridis'), marker_line_width=1, marker_line_color='DarkSlateGrey', hovertemplate =
            "<b>%{hovertext}</b><br><br>" + "Movie: %{customdata[0]}<br>" + "Release date: %{customdata[1]}<br>" + "Actor name: %{customdata[2]}<br>"))
fig_time.add_trace(go.Scatter3d(x=df_2['X'], y=df_2['Y'], z=df_2['Z'], mode='markers', marker=dict(size=3, colorscale='Viridis'), marker_line_width=1, marker_line_color='DarkSlateGrey', hovertemplate =
            "<b>%{hovertext}</b><br><br>" + "Movie: %{customdata[0]}<br>" + "Release date: %{customdata[1]}<br>" + "Actor name: %{customdata[2]}<br>"))
fig_time.add_trace(go.Scatter3d(x=df_3['X'], y=df_3['Y'], z=df_3['Z'], mode='markers', marker=dict(size=3, colorscale='Viridis'), marker_line_width=1, marker_line_color='DarkSlateGrey', hovertemplate =
            "<b>%{hovertext}</b><br><br>" + "Movie: %{customdata[0]}<br>" + "Release date: %{customdata[1]}<br>" + "Actor name: %{customdata[2]}<br>"))
fig_time.add_trace(go.Scatter3d(x=df_4['X'], y=df_4['Y'], z=df_4['Z'], mode='markers', marker=dict(size=3, colorscale='Viridis'), marker_line_width=1, marker_line_color='DarkSlateGrey', hovertemplate =
            "<b>%{hovertext}</b><br><br>" + "Movie: %{customdata[0]}<br>" + "Release date: %{customdata[1]}<br>" + "Actor name: %{customdata[2]}<br>"))
fig_time.add_trace(go.Scatter3d(x=df_5['X'], y=df_5['Y'], z=df_5['Z'], mode='markers', marker=dict(size=3, colorscale='Viridis'), marker_line_width=1, marker_line_color='DarkSlateGrey', hovertemplate =
            "<b>%{hovertext}</b><br><br>" + "Movie: %{customdata[0]}<br>" + "Release date: %{customdata[1]}<br>" + "Actor name: %{customdata[2]}<br>"))

#create a slider to switch between the time periods
steps = []
for i in range(5):
    step = dict(
        method="restyle",
        args=["visible", [False] * 5],
        label = 'Time period ' + str(i+1)
    )
    step["args"][1][i] = True # Toggle i'th trace to "visible"
    steps.append(step)

sliders = [dict(
    active=0,
    currentvalue={"prefix": "Time period: "},
    pad={"t": 50},
    steps=steps
)]

fig_time.update_layout(
    sliders=sliders
)

fig_time.show()
