In [1]:
import pandas as pd
import csv
import numpy as np
import mplcursors
import matplotlib.pyplot as plt
import matplotlib as mpl
import os

In [2]:
def find_repo_root(start_path):
    """
    useful general function for finding the (first, closest) repo root so github file paths work the same on different machines 
    """
    current_path = os.path.abspath(start_path)
    
    while True:
        # Check for the existence of the .git directory or other indicators
        if os.path.isdir(os.path.join(current_path, '.git')) or \
           os.path.isfile(os.path.join(current_path, 'README.md')):
            return current_path
        
        parent_path = os.path.dirname(current_path)
        
        # Stop if we reach the root directory
        if parent_path == current_path:
            break
        
        current_path = parent_path

    return None  # Return None if not found

root = find_repo_root(os.getcwd())

In [3]:
### Make matrix with all movie
all_tropes = []
matrix_maker = []
genres = []
with open(f"{root}/Data/liteweight/2020_genre_counts_by_trope.csv", 'r') as file:
    reader = csv.reader(file)
    row1 = next(reader)
    for i in range(2,29):
            genres.append(row1[i])
    for line in reader:
        #line_list = line.split(",")
        all_tropes.append(line[1])
        genre_counts = []
        for i in range(2,29):
            genre_counts.append(int(line[i]))
        normalization = sum(genre_counts)
        try:
            genre_percents = [genre_count / normalization for genre_count in genre_counts]
            matrix_maker.append(genre_percents)
        except ZeroDivisionError:
            matrix_maker.append(genre_counts)
    
bad_matrix = np.array(matrix_maker)
matrix = bad_matrix.transpose()

In [4]:
matrix_df = pd.DataFrame(matrix)

In [5]:
matrix_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,26236,26237,26238,26239,26240,26241,26242,26243,26244,26245
0,0.148148,0.15625,0.09375,0.0,0.055556,0.0,0.0,0.142857,0.130435,0.0,...,0.0,0.102326,0.134752,0.119048,0.0,0.166667,0.0,0.333333,0.32,0.333333
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.037037,0.0625,0.09375,0.125,0.166667,0.0,0.2,0.142857,0.195652,0.0,...,0.0,0.032558,0.056738,0.047619,0.0,0.0,0.0,0.0,0.28,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.03125,0.0,0.0,0.0,0.0,0.0,0.0,0.065217,0.0,...,0.0,0.0,0.0,0.007937,0.0,0.0,0.0,0.0,0.0,0.0
5,0.111111,0.0625,0.25,0.375,0.083333,0.333333,0.4,0.285714,0.130435,0.0,...,0.083333,0.15814,0.141844,0.119048,0.0,0.166667,0.0,0.0,0.16,0.0
6,0.111111,0.125,0.03125,0.0,0.027778,0.0,0.0,0.0,0.0,0.0,...,0.0,0.004651,0.0,0.007937,0.0,0.0,0.0,0.0,0.04,0.0
7,0.0,0.03125,0.0,0.0,0.0,0.083333,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,0.296296,0.15625,0.1875,0.125,0.277778,0.0,0.2,0.142857,0.130435,0.0,...,0.166667,0.065116,0.049645,0.063492,0.0,0.0,0.0,0.0,0.0,0.333333
9,0.037037,0.0,0.03125,0.125,0.222222,0.0,0.2,0.0,0.086957,0.0,...,0.0,0.013953,0.014184,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [6]:
df = pd.read_csv(f"{root}/Data/liteweight/2020_genre_counts_by_trope.csv", index_col=0)
print(df.columns)
genre_cols =  ['Action', 'Adult', 'Adventure', 'Animation', 'Biography',
       'Comedy', 'Crime', 'Documentary', 'Drama', 'Family', 'Fantasy',
       'Film-Noir', 'Game-Show', 'History', 'Horror', 'Music', 'Musical',
       'Mystery', 'News', 'Romance', 'Sci-Fi', 'Short', 'Sport', 'Thriller',
       'War', 'Western']
df['num_genres'] = df[genre_cols].sum(axis=1)
df.drop(df[df['num_genres']==0].index, inplace=True)
df['vector'] =  df.apply(lambda row: np.array([row[col] / row['num_genres'] for col in genre_cols]), axis=1)

Index(['Trope', 'Action', 'Adult', 'Adventure', 'Animation', 'Biography',
       'Comedy', 'Crime', 'Documentary', 'Drama', 'Family', 'Fantasy',
       'Film-Noir', 'Game-Show', 'History', 'Horror', 'Music', 'Musical',
       'Mystery', 'News', 'Romance', 'Sci-Fi', 'Short', 'Sport', 'Thriller',
       'War', 'Western', '\N', 'Associated_Movies', 'Number_movies'],
      dtype='object')


In [7]:
df

Unnamed: 0,Trope,Action,Adult,Adventure,Animation,Biography,Comedy,Crime,Documentary,Drama,...,Short,Sport,Thriller,War,Western,\N,Associated_Movies,Number_movies,num_genres,vector
0,ABNegative,4,0,1,0,0,3,3,0,8,...,0,0,2,0,0,1,"['Andhadhun', 'Blood Work', 'Bruce Almighty', ...",10,26,"[0.15384615384615385, 0.0, 0.03846153846153846..."
1,ABirthdayNotABreak,5,0,2,0,1,2,4,1,5,...,0,0,3,1,0,0,"['Batman Begins', 'Beast', 'Big Game', 'Creatu...",13,32,"[0.15625, 0.0, 0.0625, 0.0, 0.03125, 0.0625, 0..."
2,ABloodyMess,3,0,3,0,0,8,1,0,6,...,0,0,2,0,0,0,"['Carry on Behind', 'Cloud Atlas', 'Cornered',...",12,32,"[0.09375, 0.0, 0.09375, 0.0, 0.0, 0.25, 0.0312..."
3,ABoyAGirlAndABabyFamily,0,0,1,0,0,3,0,0,1,...,0,0,0,0,0,0,"['Honey, I Blew Up the Kid', 'The Addams Famil...",3,8,"[0.0, 0.0, 0.125, 0.0, 0.0, 0.375, 0.0, 0.0, 0..."
4,ABoyAndHisX,2,0,6,0,0,3,1,0,10,...,1,0,1,0,0,0,"['Air Bud', 'Alpha', 'Bumblebee', 'Fido', 'Fly...",13,36,"[0.05555555555555555, 0.0, 0.16666666666666666..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
26240,ZombieMooks,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,['Phantasm'],1,2,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
26241,ZombiePukeAttack,1,0,0,0,0,1,0,0,0,...,1,0,1,0,0,0,"['Day of the Dead', 'Demoni', 'Scary Movie']",3,6,"[0.16666666666666666, 0.0, 0.0, 0.0, 0.0, 0.16..."
26243,Zoom,1,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,['Time of the Apes'],1,3,"[0.3333333333333333, 0.0, 0.0, 0.0, 0.0, 0.0, ..."
26244,ZorroMark,8,0,7,0,0,4,1,0,0,...,0,0,1,0,0,0,"['Batman v Superman: Dawn of Justice', 'Cartou...",9,25,"[0.32, 0.0, 0.28, 0.0, 0.0, 0.16, 0.04, 0.0, 0..."


In [8]:
### Define step function
def H(x,t):
    if x < t:
        return 0
    elif x >= t:
        return 1

In [9]:
### Fill in movie vector
def vec_filler(x):
    movie_vec = [0] * len(all_tropes)
    with open(f"{root}/Code/Alien Stuff/Alien Tropes - CodeTest.tsv", "r") as file:
        tsv_reader = csv.reader(file, delimiter='\t')
        next(tsv_reader)
        for line in tsv_reader:
            trope = line[0].replace(" ", "")
            if trope in all_tropes:
                vec_index = all_tropes.index(trope)
                if line[1] == "Yes" or line[2] == "Yes" and line[5] != "Yes":
                    movie_vec[vec_index] -= H(x, float(line[6]))
                elif line[5] == "Yes":
                    pass
                else:
                    movie_vec[vec_index] += H(x, float(line[6]))
            elif trope not in all_tropes:
                pass ### Need to find a way to keep track of this
    normalizer = sum(movie_vec)
    movie_vec = [element / normalizer for element in movie_vec]
    movie_vec = np.array(movie_vec)
    return movie_vec

In [10]:
df_alien_vec = pd.read_csv(f"{root}/Code/Alien Stuff/Alien Tropes - CodeTest.tsv", sep="\t")

In [11]:
df_alien_vec

Unnamed: 0,Trope,Inverted?/Defied?,Averted/Subverted?,Rough Occurence in movie,Background?,Setups?,Start Time,End Time,Total Time
0,Action Survivor,No,No,Part of the genre of the movie,Yes,No,0,7014,7014
1,Admiring The Abomination,No,No,The perfect organism,No,No,5162,5162,7014
2,AI Is A Crapshoot,No,No,Ripley discovers Ash's intentions,No,No,5130,5130,7014
3,Air-Vent Passageway,No,No,Xenomorph on ship,No,No,4107,4108,7014
4,Alien Blood,No,No,Trying to remove fachugger from Kane,No,No,2373,2462,7014
...,...,...,...,...,...,...,...,...,...
240,White Shirt of Death,No,No,Kane is wearing a white shirt when he dies,No,No,3366,3366,7014
241,Worst Aid,No,No,Parker forcing a piece of plastic into Kanes m...,No,No,3352,3352,7014
242,You Are In Command Now,No,No,Ripley is the only officer still on the ship,No,No,2670,2670,7014
243,You Are In Command Now,No,No,"Dallas and Kane are dead, leaving Ripley in ch...",No,No,4543,4543,7014


In [12]:
times = df_alien_vec[['Start Time', 'End Time']].value_counts().reset_index()
times


Unnamed: 0,Start Time,End Time,count
0,0,7014,28
1,3385,3385,7
2,4995,4995,5
3,4804,4804,5
4,5937,5937,5
...,...,...,...
147,6601,6694,1
148,6729,6730,1
149,6762,6802,1
150,6810,6810,1


In [13]:
x = 0
snapshots = []
x_points = []
while x < 7014:
    new_basis_vec = np.matmul(matrix, vec_filler(x))
    new_basis_vec = list(new_basis_vec)
    snapshots.append(new_basis_vec)
    x_points.append(x)
    x += 1
    if x % 500 == 0:
        print(f"Processed {x} steps")

Processed 500 steps
Processed 1000 steps
Processed 1500 steps
Processed 2000 steps
Processed 2500 steps
Processed 3000 steps
Processed 3500 steps
Processed 4000 steps
Processed 4500 steps
Processed 5000 steps
Processed 5500 steps
Processed 6000 steps
Processed 6500 steps
Processed 7000 steps


In [14]:
genre_proportions = np.transpose(snapshots)
genre = "Horror"  ### Input genre here

_index = genres.index(genre)
props = list(genre_proportions[_index])

### This is the genre makeup data

In [15]:
### Get titles, times, and descriptions of tropes

labels = []
times = []
with open(f"{root}/Code/Alien Stuff/Alien Tropes - CodeTest.tsv", "r") as file:
    tsv_reader = csv.reader(file, delimiter='\t')
    next(tsv_reader)
    for line in tsv_reader:
        trope = line[0].replace(" ", "")
        inverted_or_defied = line[1]
        averted_or_subverted = line[2]
        descriptor = line[3]
        background = line[4]
        setup = line[5]
        start_time = float(line[6])
        end_time = float(line[7])
        if background == "Yes":
            continue
        elif inverted_or_defied == "Yes":
            label = "Inverted/Defied " + trope + ": " + descriptor
            labels.append(label)
            times.append(start_time)
        elif averted_or_subverted == "Yes":
            label = "Averted/Subverted " + trope + ": " + descriptor
            labels.append(label)
            times.append(start_time)
        elif setup == "Yes":
            label_one = "Setup - " + trope + ": " + descriptor
            label_two = "Conclusion - " + trope + ": " + descriptor
            labels.append(label_one)
            times.append(start_time)
            labels.append(label_two)
            times.append(end_time)
        else:
            label = trope + ": " + descriptor
            labels.append(label)
            times.append(start_time)


In [16]:
times = list(set(times))
labels = [""] * len(times) 
with open(f"{root}/Code/Alien Stuff/Alien Tropes - CodeTest.tsv", "r") as file:
    tsv_reader = csv.reader(file, delimiter='\t')
    next(tsv_reader)
    for line in tsv_reader:
        trope = line[0].replace(" ", "")
        inverted_or_defied = line[1]
        averted_or_subverted = line[2]
        descriptor = line[3]
        background = line[4]
        setup = line[5]
        start_time = float(line[6])
        end_time = float(line[7])
        if background == "Yes":
            continue
        elif inverted_or_defied == "Yes":
            label = "Inverted/Defied " + trope + ": " + descriptor
            index = times.index(start_time)
            labels[index] += label + "\n"
        elif averted_or_subverted == "Yes":
            label = "Averted/Subverted " + trope + ": " + descriptor
            index = times.index(start_time)
            labels[index] += label + "\n"
        elif setup == "Yes":
            label_one = "Setup - " + trope + ": " + descriptor
            label_two = "Conclusion - " + trope + ": " + descriptor
            index_one = times.index(start_time)
            labels[index_one] += label_one + "\n"
            index_two = times.index(end_time)
            labels[index_two] += label_two + "\n"
        else:
            label = trope + ": " + descriptor
            index = times.index(start_time)
            labels[index] += label + "\n"

In [17]:
interactable_y = []
for time in times:
    indecks = x_points.index(int(time))
    interactable_y.append(props[indecks])

In [18]:
# %pip install plotly
# %pip install --upgrade nbformat 
## this requires that you install rust. go to https://rustup.rs/

In [19]:
# ### This is the problematic block, the code should be fine but it's not popping up in a window like it should

# %matplotlib notebook
# print(mpl.get_backend())
# fig, ax = plt.subplots(figsize =(6,6))
# plt.title(genre + " make up of Alien, with tropes")
# plt.xlabel("Elapsed Time(s)")
# plt.ylabel("% Makeup")
# ax.plot(x_points, props, c = "blue")
# ax.scatter(times, interactable_y, c = "darkorange", s = 60)
# mplcursors.cursor(ax, hover = True).connect("add", lambda sel: sel.annotation.set_text(labels[sel.index]))
# plt.show()

In [20]:
import plotly.graph_objects as go
import numpy as np

# Create the plot
fig = go.Figure()

# Add the line plot
fig.add_trace(go.Scatter(
    x=x_points,
    y=props,
    mode='lines',
    name='Makeup Over Time',
    line=dict(color='blue')
))

# Add the scatter points with hover text
fig.add_trace(go.Scatter(
    x=times,
    y=interactable_y,
    mode='markers',
    name='Key Points',
    marker=dict(color='darkorange', size=10),
    text=labels,  # Hover text for each point
    hoverinfo='text'  # Display the hover text
))

# Update the layout
fig.update_layout(
    title=f"{genre} make up of Alien, with tropes",
    xaxis_title="Elapsed Time(s)",
    yaxis_title="% Makeup",
    showlegend=True,
    template='plotly_white'
)

# Show the plot
fig.show()
