In [12]:
import pandas as pd
import ast

In [4]:
import os

def find_repo_root(start_path):
    """
    useful general function for finding the (first, closest) repo root so github file paths work the same on different machines 
    """
    current_path = os.path.abspath(start_path)
    
    while True:
        # Check for the existence of the .git directory or other indicators
        if os.path.isdir(os.path.join(current_path, '.git')) or \
           os.path.isfile(os.path.join(current_path, 'README.md')):
            return current_path
        
        parent_path = os.path.dirname(current_path)
        
        # Stop if we reach the root directory
        if parent_path == current_path:
            break
        
        current_path = parent_path

    return None  # Return None if not found

root = find_repo_root(os.getcwd())
print(root)

c:\Users\fitsl\Documents\Programming\UVM Programming Classes\PoCS\pocs_project


In [13]:
df = pd.read_csv(f"{root}/Data/letterboxd_search.csv", index_col=0)
df.columns

Index(['Movie', 'Tropes', 'Movie_strip', 'year', 'imdb_', 'letterboxd_search'], dtype='object')

In [14]:
df['nu_tropes'] = df['Tropes'].apply(lambda x: x.split(","))

In [15]:
tropes = df['nu_tropes'].explode()
tropes = tropes.unique().tolist()
df_t = pd.DataFrame({'trope' : tropes})
df_t


Unnamed: 0,trope
0,ArgentineMedia
1,BlackMagic
2,ChekhovsGun
3,ChekhovsGunman
4,DramaticThunder
...,...
38275,Primordial
38276,MultiTaskedConversation
38277,NelsonMandela
38278,TheRunningMan


In [16]:
df['nu_tropes'].iloc[0]

[' ArgentineMedia',
 'BlackMagic',
 'ChekhovsGun',
 'ChekhovsGunman',
 'DramaticThunder',
 'DueToTheDead',
 'EvilMentor',
 'ExactWords',
 'FilmsOf20102014',
 'GrayRainOfDepression',
 'GreatOffscreenWar',
 'HesBack',
 'HistoricalHeroUpgrade',
 'HowWeGotHere',
 'ItAlwaysRainsAtFunerals',
 'OffscreenMomentOfAwesome',
 'RightfulKingReturns',
 'SexyDiscretionShot',
 'TheExile',
 'ThouShaltNotKill',
 'VeryLooselyBasedOnATrueStory',
 'WeWantOurJerkBack',
 'WhatsUpKingDude']

In [17]:
def trope_grabber(trope, df):
    movies = []
    for index, row in df.iterrows():
        if trope in row['Tropes']:
            movies.append((row['Movie'], row['letterboxd_search']))
    return movies
    
df_t_n = df_t.iloc[:10].copy()
df_t_n['movie'] = df_t_n['trope'].apply(lambda x : trope_grabber(x, df))

In [18]:
df_t_n = df_t.iloc[:10].copy()
df_t_n['movie'] = df_t_n['trope'].apply(lambda x : trope_grabber(x, df))

In [19]:
df_t_n

Unnamed: 0,trope,movie
0,ArgentineMedia,"[(Puerta De Hierro, https://letterboxd.com/s..."
1,BlackMagic,"[(Puerta De Hierro, https://letterboxd.com/s..."
2,ChekhovsGun,"[(Puerta De Hierro, https://letterboxd.com/s..."
3,ChekhovsGunman,"[(Puerta De Hierro, https://letterboxd.com/s..."
4,DramaticThunder,"[(Puerta De Hierro, https://letterboxd.com/s..."
5,DueToTheDead,"[(Puerta De Hierro, https://letterboxd.com/s..."
6,EvilMentor,"[(Puerta De Hierro, https://letterboxd.com/s..."
7,ExactWords,"[(Puerta De Hierro, https://letterboxd.com/s..."
8,FilmsOf20102014,"[(Puerta De Hierro, https://letterboxd.com/s..."
9,GrayRainOfDepression,"[(Puerta De Hierro, https://letterboxd.com/s..."


In [20]:
def trope_grabber(trope, df):
    mask = df['Tropes'].str.contains(trope, na=False)  # Boolean mask for matching tropes
    movies = df[mask][['Movie', 'letterboxd_search']]  
    return movies[['Movie', 'letterboxd_search']].values.tolist()  # Return list of tuples

df_t['movie'] = df_t['trope'].apply(lambda trope: trope_grabber(trope, df))
df_t

Unnamed: 0,trope,movie
0,ArgentineMedia,"[[Puerta De Hierro, https://letterboxd.com/s..."
1,BlackMagic,"[[Puerta De Hierro, https://letterboxd.com/s..."
2,ChekhovsGun,"[[Puerta De Hierro, https://letterboxd.com/s..."
3,ChekhovsGunman,"[[Puerta De Hierro, https://letterboxd.com/s..."
4,DramaticThunder,"[[Puerta De Hierro, https://letterboxd.com/s..."
...,...,...
38275,Primordial,"[[The Librarian, https://letterboxd.com/searc..."
38276,MultiTaskedConversation,"[[Matewan, https://letterboxd.com/search/Matew..."
38277,NelsonMandela,"[[The Running Man, https://letterboxd.com/se..."
38278,TheRunningMan,"[[The Running Man, https://letterboxd.com/se..."


In [21]:
df_t['movie'].iloc[0]

[['Puerta  De  Hierro',
  'https://letterboxd.com/search/Puerta+De+Hierro+2013/'],
 ['Chinese  Takeaway', 'https://letterboxd.com/search/Chinese+Takeaway+2020/'],
 ['Corazon  Loco', 'https://letterboxd.com/search/Corazon+Loco+2020/'],
 ['The  Official  Story',
  'https://letterboxd.com/search/The+Official+Story+1985/'],
 ['Nine  Queens', 'https://letterboxd.com/search/Nine+Queens+2000/']]

In [None]:
df_t['Movie_Titles']  = df_t['movie'].apply(lambda x: [i[0] for i in x])
df_t['letterboxd_searches'] = df_t['movie'].apply(lambda x: [i[1] for i in x])
df_t.drop(columns=["movie"], inplace=True)
df_t.to_csv(f"{root}/Data/Trope_storage/movies_by_trope_with_lettersearch.csv")

In [17]:
df = pd.read_csv(f"{root}/Data/Trope_storage/movies_by_trope_with_lettersearch.csv", index_col=0)
df['Movie_Titles'] = df['Movie_Titles'].apply(ast.literal_eval)
df['length'] = df['Movie_Titles'].apply(lambda x : len(x))
df.to_csv(f"{root}/Data/Trope_storage/movies_by_trope_with_lettersearch_and_length.csv")