In [None]:
import pandas as pd

df = pd.read_csv("/content/drive/MyDrive/SFU/STeve/DocsToExtractData/letters_chunks_processed_expert.csv", index_col=0)

df.reset_index(drop=True)

df = df.iloc[0:862]

In [None]:
df["timestamp"] = pd.to_datetime(df["date"])

In [None]:
df.head()

#1. Data Parsing (Characters and Relevance)

In [None]:
import re

keywords = ["Emotions", "Characters", "Context", "Relevance", "Paintings", "Comment"]

def extract_text_after_keyword(text, keyword, next_keyword):
    try:
        # Make the 's' in 'Comments' optional and include a lookahead to ensure it stops at 'Comment'
        if keyword == "Comment":
            keyword = "Comments?"
        if next_keyword == "Comment":
            next_keyword = "Comments?"

        pattern = rf"{keyword}:\s*(.*?)(?=\s*{next_keyword}:|$)"
        match = re.search(pattern, text, re.DOTALL)
        if match:
            return match.group(1).strip()
        return ""
    except Exception as e:
        print(f"Error processing keyword '{keyword}': {e}")
        print(text)
        return ""


for index, row in df.iterrows():
    for i, keyword in enumerate(keywords):

        next_keyword = keywords[i + 1] if i + 1 < len(keywords) else None

        extracted_text = extract_text_after_keyword(row['Scene_Analysis'], keyword, next_keyword)

        df.at[index, keyword] = extracted_text


In [None]:
df

In [None]:
df.head()

In [None]:
df.iloc[0]["API_Response"]

In [None]:
def process_text(text):
    pattern = r"(NARRATOR \(V\.O\.?\))|(BACKGROUND)|(VINCENT VAN GOGH \(V\.O\.?\))"
    split_text = re.split(pattern, text)

    temp_dict = {'NARRATOR (V.O.)': '', 'BACKGROUND': '', 'VINCENT VAN GOGH (V.O.)': ''}
    key = None  # Initializing key

    for section in split_text:
        if section in temp_dict:
            key = section
        elif section and key:
            temp_dict[key] += section.strip() + " "

    return temp_dict

In [None]:
df.loc[910]

In [None]:
df["API_Response"] = df["API_Response"].fillna("No text")

processed_data = df['API_Response'].apply(process_text)

processed_df = pd.DataFrame(processed_data.tolist())

df.reset_index(inplace=True, drop=True)
processed_df.reset_index(inplace=True, drop=True)

df_expanded = pd.merge(df, processed_df, left_index=True, right_index=True)

df_expanded.head()


In [None]:
df_expanded.tail()

In [None]:
narrator_empty_count = (df_expanded['NARRATOR (V.O.)'] == "").sum()
background_empty_count = (df_expanded['BACKGROUND'] == "").sum()
vincent_empty_count = (df_expanded['VINCENT VAN GOGH (V.O.)'] == "").sum()

print("Empty 'NARRATOR (V.O.)' count:", narrator_empty_count)
print("Empty 'BACKGROUND' count:", background_empty_count)
print("Empty 'VINCENT VAN GOGH (V.O.)' count:", vincent_empty_count)


In [None]:
condition = (df_expanded['NARRATOR (V.O.)'] == "") | (df_expanded['BACKGROUND'] == "") | (df_expanded['VINCENT VAN GOGH (V.O.)'] == "")

sub_dataset = df_expanded.loc[condition]

print(sub_dataset.shape)
sub_dataset.head()


(33, 18)


Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,Characters,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.)
7,008_V-T_008.pdf,1873-05-09,T-H,VvG,TvG,"Letter 008\nThe Hague, 9th May 1873\nDear Theo...","NARRATOR (V.O.)\nIn 1873, an emotional unsettl...","Emotions:\n[Sorrowful, Unsettled]\n\nCharacter...",1873-05-09,"[Sorrowful, Unsettled]","[Van Gogh, Theo (mentioned), Mrs. Tersteeg (me...",This scene represents when Van Gogh was about ...,8,[None],The scene under review is crucial in understan...,"In 1873, an emotional unsettled Van Gogh was t...","May 9, 1873, a humble Dutch residence in The H...",
14,015_V-T_012.pdf,1873-11-19,London,VvG,TvG,"Letter 012\nLondon, 19 November 1873\nDear The...","NARRATOR (V.O)\nIn the cold winter of 1873, a ...","Emotions:\n[Passion, Isolation, Longing, Conce...",1873-11-19,"[Passion, Isolation, Longing, Concern, Joy]","[Vincent Van Gogh, Theo Van Gogh (mentioned)]",This scene happens in Van Gogh's young adult y...,7,[None],This scene provides a deep insight into Vincen...,,":\nThe scene unfolds in a modest, dimly-lit ro...","Theo, Dear Brother, my days in London are fill..."
114,117_V-T_102.pdf,1877-07-15,Amsterdam,VvG,TvG,"Letter 102\nAmsterdam, Sunday 15 July 1877\nDe...",NARRATOR (V.O)\nWe are in the city of Amsterda...,"Emotions:\n[Yearning, Perseverance]\n\nCharact...",1877-07-15,"[Yearning, Perseverance]","[Vincent Van Gogh, Theo(mentioned), Reverend M...",This scene occurs when Van Gogh is a young man...,8,"['The Church at Auvers', 'View of the Sea at S...",This pre-artistry phase of Van Gogh's life sig...,,It is Sunday morning. Rain falls softly on cob...,"Today, I left for service amidst rain, and whe..."
126,129_V-T_114.pdf,1877-11-25,Amsterdam,VvG,TvG,"Letter 114\nAmsterdam, 25 November 1877\nDear ...","NARRATOR (V.O.)\nIn November 1877, a desolate ...","Emotions:\n[Desolation, Melancholy]\n\nCharact...",1877-11-25,"[Desolation, Melancholy]","[Vincent Van Gogh, Theo Van Gogh(mentioned), B...",This scene depicts an isolated Van Gogh in Ams...,9,[None],This scene is profound in its depiction of Van...,"In November 1877, a desolate Vincent Van Gogh ...",Twilight descends upon the city of Amsterdam. ...,
177,186_V-T_164.pdf,1881-12-21,Etten,VvG,TvG,"Letter 164\nEtten, c. 21 December 1881\nSometi...","NARRATOR (V.O.)\nVan Gogh, returning from Hagu...","conversation with his brother, Theo. He feels ...",1881-12-21,"[Desolation, Disquiet]","[Van Gogh, Theo (mentioned)]",This scene occurs after Van Gogh has returned ...,7,"[""At Eternity's Gate"", ""Sorrow""]",This scene reveals a key period in Van Gogh's ...,"Van Gogh, returning from Hague, is reflecting ...",,


In [None]:
sub_dataset.loc[177]["API_Response"]

'NARRATOR (V.O.)\nVan Gogh, returning from Hague, is reflecting on a'

In [None]:
for number in list(sub_dataset.index):
  temp = process_text(sub_dataset.loc[number]["API_Response"])
  sub_dataset.loc[number, temp.keys()] = temp.values()

In [None]:
sub_dataset

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,Characters,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.)
7,008_V-T_008.pdf,1873-05-09,T-H,VvG,TvG,"Letter 008\nThe Hague, 9th May 1873\nDear Theo...","NARRATOR (V.O.)\nIn 1873, an emotional unsettl...","Emotions:\n[Sorrowful, Unsettled]\n\nCharacter...",1873-05-09,"[Sorrowful, Unsettled]","[Van Gogh, Theo (mentioned), Mrs. Tersteeg (me...",This scene represents when Van Gogh was about ...,8,[None],The scene under review is crucial in understan...,"In 1873, an emotional unsettled Van Gogh was t...","May 9, 1873, a humble Dutch residence in The H...",
14,015_V-T_012.pdf,1873-11-19,London,VvG,TvG,"Letter 012\nLondon, 19 November 1873\nDear The...","NARRATOR (V.O)\nIn the cold winter of 1873, a ...","Emotions:\n[Passion, Isolation, Longing, Conce...",1873-11-19,"[Passion, Isolation, Longing, Concern, Joy]","[Vincent Van Gogh, Theo Van Gogh (mentioned)]",This scene happens in Van Gogh's young adult y...,7,[None],This scene provides a deep insight into Vincen...,,":\nThe scene unfolds in a modest, dimly-lit ro...","Theo, Dear Brother, my days in London are fill..."
114,117_V-T_102.pdf,1877-07-15,Amsterdam,VvG,TvG,"Letter 102\nAmsterdam, Sunday 15 July 1877\nDe...",NARRATOR (V.O)\nWe are in the city of Amsterda...,"Emotions:\n[Yearning, Perseverance]\n\nCharact...",1877-07-15,"[Yearning, Perseverance]","[Vincent Van Gogh, Theo(mentioned), Reverend M...",This scene occurs when Van Gogh is a young man...,8,"['The Church at Auvers', 'View of the Sea at S...",This pre-artistry phase of Van Gogh's life sig...,,It is Sunday morning. Rain falls softly on cob...,"Today, I left for service amidst rain, and whe..."
126,129_V-T_114.pdf,1877-11-25,Amsterdam,VvG,TvG,"Letter 114\nAmsterdam, 25 November 1877\nDear ...","NARRATOR (V.O.)\nIn November 1877, a desolate ...","Emotions:\n[Desolation, Melancholy]\n\nCharact...",1877-11-25,"[Desolation, Melancholy]","[Vincent Van Gogh, Theo Van Gogh(mentioned), B...",This scene depicts an isolated Van Gogh in Ams...,9,[None],This scene is profound in its depiction of Van...,"In November 1877, a desolate Vincent Van Gogh ...",Twilight descends upon the city of Amsterdam. ...,
177,186_V-T_164.pdf,1881-12-21,Etten,VvG,TvG,"Letter 164\nEtten, c. 21 December 1881\nSometi...","NARRATOR (V.O.)\nVan Gogh, returning from Hagu...","conversation with his brother, Theo. He feels ...",1881-12-21,"[Desolation, Disquiet]","[Van Gogh, Theo (mentioned)]",This scene occurs after Van Gogh has returned ...,7,"[""At Eternity's Gate"", ""Sorrow""]",This scene reveals a key period in Van Gogh's ...,"Van Gogh, returning from Hague, is reflecting ...",,
179,188_V-C_165a.pdf,1881-01-01,Etten,VvG,Uncle Cor,"Letter 165a\nEtten\nDear Uncle,\nAs Theo is go...",NARRATOR (V.O)\nAt this time in Vincent van Go...,"Emotions:\n[Isolation, Yearning]\n\nCharacters...",1881-01-01,"[Isolation, Yearning]","[Vincent van Gogh, Uncle(mentioned), Theo(ment...",This scene depicts a period of Vincent van Gog...,8,[None specifically mentioned],This scene showcases a heartfelt plea by Van G...,,":\nA small, modest Dutch room draped in the wa...","Dear Uncle, Time finds me in the throes of dee..."
191,201_V-T_177.pdf,1882-02-25,T-H,VvG,TvG,"Letter 177\nThe Hague, Saturday 25 February 18...",NARRATOR (V.O)\nThe year is 1882. We find ours...,"Emotions:\n[Anticipation, Anxiety]\n\nCharacte...",1882-02-25,"[Anticipation, Anxiety]","[Vincent Van Gogh, Theo (mentioned), Weissenbr...","This scene occurs in 1882, during Vincent Van ...",8,[No specific paintings mentioned],The scene beautifully articulates Van Gogh's i...,,Cut to an exasperatingly plain yet intimately ...,"Ah, dear Theo... your letters lend a semblance..."
198,208_V-T_184.pdf,1882-04-01,T-H,VvG,TvG,"Letter 184\nThe Hague, early April, 1882\nDear...","NARRATOR (V.O.)\nWe are now in The Hague, in e...","Emotions:\n[Desolation, Introspection, Anxiety...",1882-04-01,"[Desolation, Introspection, Anxiety, Yearning]","[Van Gogh, Theo(mentioned)]","This scene happens in April 1882, highlighting...",8,[No specific painting related],The scene captures a pivotal moment in Van Gog...,"We are now in The Hague, in early April 1882. ...",".\nA small, unruly apartment is bathed in the ...",
202,212_V-T_188.pdf,1882-04-27,T-H,VvG,TvG,"Letter 188\nThe Hague, c. 15 – 27 April 1882\n...",NARRATOR (V.O)\nIn a turbulent time of Van Gog...,"Emotions:\n[Anxiety, Determination]\n\nCharact...",1882-04-27,"[Anxiety, Determination]","[Van Gogh, Theo(mentioned), Mauve(mentioned), ...",This is a moment during Van Gogh's time in The...,8,['The Vegetable Gardens along the Laan van Mee...,This particular scene paints a vivid picture o...,,"Dim light pours through a quartered window, gl...","Theo, today I sent you another drawing, the Ve..."
210,216_V-T_195.pdf,1882-05-01,T-H,VvG,TvG,"Letter 195\nThe Hague, 1 May 1882\nDear Theo,\...",NARRATOR (V.O)\nWe find ourselves in the heart...,"Emotions:\n[Struggle, Passion, Longing, Relief...",1882-05-01,"[Struggle, Passion, Longing, Relief]","[Van Gogh, Theo(mentioned), Mauve(mentioned), ...",This scene takes place during a period of Vinc...,9,"[""Sorrow"", ""The Roots""]","In this emotionally charged scene, we are offe...",,"Inside his lonely studio in The Hague, Van Gog...","Dear Theo, your letter has given clarity amids..."


In [None]:
sub_dataset.index

Int64Index([  7,  14, 114, 126, 177, 179, 191, 198, 202, 210, 263, 269, 299,
            313, 325, 395, 396, 456, 458, 545, 573, 581, 667, 670, 676, 758,
            771, 797, 823, 835, 840, 841, 858],
           dtype='int64')

In [None]:
df_expanded.loc[list(sub_dataset.index)] = sub_dataset.loc[list(sub_dataset.index)]

In [None]:
narrator_empty_count = (df_expanded['NARRATOR (V.O.)'] == "").sum()
background_empty_count = (df_expanded['BACKGROUND'] == "").sum()
vincent_empty_count = (df_expanded['VINCENT VAN GOGH (V.O.)'] == "").sum()

print("Empty 'NARRATOR (V.O.)' count:", narrator_empty_count)
print("Empty 'BACKGROUND' count:", background_empty_count)
print("Empty 'VINCENT VAN GOGH (V.O.)' count:", vincent_empty_count)

Empty 'NARRATOR (V.O.)' count: 18
Empty 'BACKGROUND' count: 1
Empty 'VINCENT VAN GOGH (V.O.)' count: 16


In [None]:
df_expanded

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,Characters,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.)
0,001_V-T_001.pdf,1872-08-01,T-H,VvG,TvG,"Letter 001\nThe Hague, c. 18 August 1872\nDear...",NARRATOR (V.O.)\nIn the period post the depart...,"Emotions:\n[Melancholy, Nostalgia]\n\nCharacte...",1872-08-01,"[Melancholy, Nostalgia]","[Vincent Van Gogh, Theo Van Gogh(mentioned), H...",This scene is set in the winter of 1872 in Van...,8,(None provided in the scene),This poignant scene underscores the emotional ...,In the period post the departure of his brothe...,As the winter rain dwarfs against the aged gla...,"Dearest Theo, your absence in the homestead ha..."
1,002_V-T_002.pdf,1872-12-13,T-H,VvG,TvG,"Letter 002\nThe Hague, 13 Dec 1872\nDear Theo,...","NARRATOR (V.O.)\nIn 1872, Vincent van Gogh, kn...","Emotions:\n[Joy, Eagerness]\n\nCharacters:\n[V...",1872-12-13,"[Joy, Eagerness]","[Vincent Van Gogh, Theo Van Gogh(mentioned), R...",This scene reflects a young Vincent van Gogh's...,7,[None],This scene is rather significant in understand...,"In 1872, Vincent van Gogh, known for his vivid...",:\nThis scene takes place in a dimly lit room ...,"Dear Theo, Words could hardly express my joy i..."
2,003_V-T_003.pdf,1873-01-01,T-H,VvG,TvG,"Letter 003\nThe Hague, January 1873\nMy dear T...",NARRATOR (V.O.)\nWe find ourselves in the year...,"Emotions:\n[Loneliness, Comfort, Joy, Ambition...",1873-01-01,"[Loneliness, Comfort, Joy, Ambition, Anticipat...","[Vincent Van Gogh, Theo Van Gogh(mentioned), S...","This scene depicts the young Van Gogh in 1873,...",8,[None],This scene is both captivating and enlightenin...,"We find ourselves in the year 1873, in the pat...",:\nThe intimate sanctuary of Van Gogh's small ...,My dear Theo... I found comfort in hearing you...
3,004_V-T_004.pdf,1873-01-28,T-H,VvG,TvG,"Letter 004\nThe Hague, January 28 1873\nDear T...","NARRATOR (V.O.)\nIn the winter of 1873, a youn...","Emotions:\n[Longing, Pity]\n\nCharacters:\n[Vi...",1873-01-28,"[Longing, Pity]","[Vincent van Gogh, Theo van Gogh(mentioned), U...",This scene describes a time when Van Gogh was ...,7,"[None specific, but might be loosely related t...",This revealing scene captures a thoughtful and...,"In the winter of 1873, a young Vincent van Gog...",Late January evening fills the room in a board...,"Dear Theo, \nI find solace in your letter and ..."
4,005_V-T_005.pdf,1873-03-17,T-H,VvG,TvG,"Letter 005\nThe Hague, 17 March 1873\nDear The...",NARRATOR (V.O.)\nWe are venturing into the lif...,"Emotions:\n[Anticipation, Nostalgia]\n\nCharac...",1873-03-17,"[Anticipation, Nostalgia]","[Vincent van Gogh, Theo van Gogh(mentioned), U...","The scene takes place in the spring of 1873, a...",8,,This is a significant moment in van Gogh's lif...,We are venturing into the life and mind of Vin...,":\nThe scene unfolds in the cozy, dimly lit ro...","Dear Theo, I long to hear of you and Uncle Hei..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857,802_V-W_W19.pdf,1890-01-20,S-R,VvG,Wilhelmina van Gogh,"W19 1\nSaint-Rémy, 20 January 1890\nDear Siste...","NARRATOR (V.O.)\nIn this scene, we examine a t...","Emotions:\n[Anxiety, Longing, Restlessness, So...",1890-01-20,"[Anxiety, Longing, Restlessness, Solace]","[Van Gogh, Sister(mentioned), Gauguin(mentione...",This scene captures a period of Van Gogh's lif...,8,['Women in the Olive Orchard'],"In this scene, we see Van Gogh wrestling with ...","In this scene, we examine a time in Vincent va...",:\nThis scene is set in the quintessentially l...,"Dear Sister, \nThe Paris I was once entranced ..."
858,813_V-W_W20.pdf,1890-02-20,S-R,VvG,Wilhelmina van Gogh,"W20 1\nSaint-Rémy, c. 20 February 1890\nMy Dea...","NARRATOR (V.O)\nIn February 1890, post-impress...","Emotions:\n[Anxiety, Solitude, Gratitude, Year...",1890-02-20,"[Anxiety, Solitude, Gratitude, Yearning, Intri...","[Vincent Van Gogh, Sister(mentioned), Mother(m...","This scene takes place in February 1890, when ...",9,"['Branches with Almond Blossoms', 'The Arlesie...",The scene presents a thoughtful and introspect...,,"The rustic room at the asylum, illuminated onl...","My Dear Sister, \n\nIt appears as though ordin..."
859,831_V-W_W21.pdf,1890-05-20,S-R,VvG,Wilhelmina van Gogh,"W21 1\nAuvers, c. 20 May 1890\nMy Dear Sister,...",NARRATOR (V.O.)\nWe find ourselves in the late...,"Emotions:\n[Gratitude, Contentment]\n\nCharact...",1890-05-20,"[Gratitude, Contentment]","[Van Gogh, Sister(mentioned), Theo(mentioned),...",This scene takes place after Van Gogh left the...,9,"[Roses, Violets, Olive Orchards]",This scene is quite significant and encapsulat...,We find ourselves in the late spring of 1890. ...,"The scene starts in a modest room, bathed in t...","My dear Sister,\nThrongs of unuttered thoughts..."
860,839_V-W_W22.pdf,1890-06-05,A-s-O,VvG,Wilhelmina van Gogh,"W22 1\nAuvers-sur-Oise, 5 June 1890\nMy Dear S...","NARRATOR (V.O.)\nWe are in the summer of 1890,...","Emotions:\n[Determination, Longing]\n\nCharact...",1890-06-05,"[Determination, Longing]","[Vincent Van Gogh, Dr. Gachet, Theo Van Gogh(m...","This scene is from the summer of 1890, shortly...",8,"[""Dr. Gachet's Portrait"", ""The Church at Auvers""]",The scene under examination showcases the intr...,"We are in the summer of 1890, Vincent Van Gogh...",A small room bathed in the warm hues of the ev...,"June 5th, 1890...Auvers-sur-Oise. My dearest s..."


# 2. Emotion to Valence and Arousal

In [None]:
# prompt: Using dataframe df_expanded: iterate over the emotions column, which contains lists, and append them to a single list, then get a set of the list

emotions_list = []
for i in range(len(df_expanded)):
    emotions = df_expanded.loc[i, 'Emotions'].strip('[]').split(',')
    for emotion in emotions:
        emotions_list.append(emotion.strip().strip('\'').lower())

emotions_set = set(emotions_list)

print(emotions_set)


{'turbulence', 'misunderstood', 'accomplishment', 'nervous', 'discouragement', 'insecurity', 'focus', 'solidarity', 'conviction', 'passionate', 'compulsion', 'appreciation', 'enthusiasm', 'regretful', 'displacement', 'struggling', 'distressed', 'guilt', 'contemplation', 'dissatisfaction', 'impotence', 'anxiety', 'inquisitiveness', 'solitary', 'conflicted', 'maddening', 'awestruck', 'obsession', 'unease', 'captivation', 'vibrancy', 'lonely', 'gloom', 'stress', 'rage', 'pity', 'compassion', 'contentment', 'pained', 'discerning', 'devoted', 'defiance', 'coping', 'calmness', 'enrapturement', 'striving', 'inspired', 'remote', 'mesmerized', 'eagerness', 'fear', 'infatuated', 'peace', 'relentless', 'despondent', 'exhiliration', 'grief', 'disillusionment', 'fondness', 'turmoil', 'endeavor', 'nervousness', 'compassionate', 'eager', 'solace', 'repugnance', 'agony', 'determination', 'dread', 'hurt', 'defiant', 'melancholy', 'burdened', 'gratefulness', 'heartbreak', 'startled', 'troubled', 'driven

In [None]:
print(len(emotions_list))
print(len(emotions_set))

2735
341


In [None]:
df_va = pd.read_csv("/content/drive/MyDrive/SFU/STeve/DocsToExtractData/letters_words_valence_arousal.csv", index_col=0)
df_va.head()

Unnamed: 0,Word,Valence_Arousal
0,absorbed,"absorbed:[0.5,0.2]"
1,wistfultness,"wistfulness: [-0.2, -0.1]"
2,adventure,"adventure:[0.7,0.8]"
3,turbulence,"turbulence:[-0.7,0.8]"
4,impatience,"impatience:[-0.7,0.6]"


In [None]:
"confused:[-0.6,0.4]".split(":")[1].strip().strip("[").strip("]").split(",")

['-0.6', '0.4']

In [None]:
df_va[['Valence', 'Arousal']] = df_va['Valence_Arousal'].str.split(':').str[1].str.strip().str.strip("'").str.strip('[]').str.split(',', expand=True)
df_va.head()

Unnamed: 0,Word,Valence_Arousal,Valence,Arousal
0,absorbed,"absorbed:[0.5,0.2]",0.5,0.2
1,wistfultness,"wistfulness: [-0.2, -0.1]",-0.2,-0.1
2,adventure,"adventure:[0.7,0.8]",0.7,0.8
3,turbulence,"turbulence:[-0.7,0.8]",-0.7,0.8
4,impatience,"impatience:[-0.7,0.6]",-0.7,0.6


In [None]:
df_va['Valence'] = pd.to_numeric(df_va['Valence'])
df_va['Arousal'] = pd.to_numeric(df_va['Arousal'])

In [None]:
df_va = df_va.drop("Valence_Arousal", axis=1)

In [None]:
df_va = df_va.set_index(df_va.columns[0])


In [None]:
df_va.head()

Unnamed: 0_level_0,Valence,Arousal
Word,Unnamed: 1_level_1,Unnamed: 2_level_1
absorbed,0.5,0.2
wistfultness,-0.2,-0.1
adventure,0.7,0.8
turbulence,-0.7,0.8
impatience,-0.7,0.6


In [None]:
import numpy as np

lista = np.array(["Desperation", "Isolation", "Passion"])

def get_average_VA(emotions):
    if not emotions:
        return np.array([0,0])

    lista = np.array(emotions)
    formatted_lista = [word.lower().strip() for word in lista if word.strip()]

    if not formatted_lista:
        return np.array([0,0])

    try:
        sums = df_va.loc[formatted_lista].to_numpy().sum(axis=0)
        result = sums / len(formatted_lista)
        return result
    except KeyError as e:
        print(f"KeyError: {e}")
        return np.array([0,0])


In [None]:
df_expanded.columns

Index(['file_name', 'date', 'origin', 'sender', 'receiver', 'content',
       'API_Response', 'Scene_Analysis', 'timestamp', 'Emotions', 'Characters',
       'Context', 'Relevance', 'Paintings', 'Comment', 'NARRATOR (V.O.)',
       'BACKGROUND', 'VINCENT VAN GOGH (V.O.)'],
      dtype='object')

In [None]:
df_expanded["Emotions"].iloc[0].strip("[").strip("]").split(",")

['Melancholy', ' Nostalgia']

In [None]:
emotions_list = df_expanded["Emotions"].str.strip("[").str.strip("]").str.split(",")
emotions_clean = emotions_list.apply(lambda x: [word.strip() for word in x if word.strip()])
emotions_clean

0                                [Melancholy, Nostalgia]
1                                       [Joy, Eagerness]
2      [Loneliness, Comfort, Joy, Ambition, Anticipat...
3                                        [Longing, Pity]
4                              [Anticipation, Nostalgia]
                             ...                        
857             [Anxiety, Longing, Restlessness, Solace]
858    [Anxiety, Solitude, Gratitude, Yearning, Intri...
859                             [Gratitude, Contentment]
860                             [Determination, Longing]
861            [Desperation, Burdened, Longing, Urgency]
Name: Emotions, Length: 862, dtype: object

In [None]:
va_values = emotions_clean.apply(get_average_VA)

# Split the returned lists into two separate columns
df_expanded['Valence'], df_expanded['Arousal'] = zip(*va_values)

df_expanded.head()

KeyError: "None of [Index([''isolation'', ''yearning'', ''sadness'', ''hope''], dtype='object', name='Word')] are in the [index]"


Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,Characters,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal
0,001_V-T_001.pdf,1872-08-01,T-H,VvG,TvG,"Letter 001\nThe Hague, c. 18 August 1872\nDear...",NARRATOR (V.O.)\nIn the period post the depart...,"Emotions:\n[Melancholy, Nostalgia]\n\nCharacte...",1872-08-01,"[Melancholy, Nostalgia]","[Vincent Van Gogh, Theo Van Gogh(mentioned), H...",This scene is set in the winter of 1872 in Van...,8,(None provided in the scene),This poignant scene underscores the emotional ...,In the period post the departure of his brothe...,As the winter rain dwarfs against the aged gla...,"Dearest Theo, your absence in the homestead ha...",-0.1,-0.3
1,002_V-T_002.pdf,1872-12-13,T-H,VvG,TvG,"Letter 002\nThe Hague, 13 Dec 1872\nDear Theo,...","NARRATOR (V.O.)\nIn 1872, Vincent van Gogh, kn...","Emotions:\n[Joy, Eagerness]\n\nCharacters:\n[V...",1872-12-13,"[Joy, Eagerness]","[Vincent Van Gogh, Theo Van Gogh(mentioned), R...",This scene reflects a young Vincent van Gogh's...,7,[None],This scene is rather significant in understand...,"In 1872, Vincent van Gogh, known for his vivid...",:\nThis scene takes place in a dimly lit room ...,"Dear Theo, Words could hardly express my joy i...",0.9,0.7
2,003_V-T_003.pdf,1873-01-01,T-H,VvG,TvG,"Letter 003\nThe Hague, January 1873\nMy dear T...",NARRATOR (V.O.)\nWe find ourselves in the year...,"Emotions:\n[Loneliness, Comfort, Joy, Ambition...",1873-01-01,"[Loneliness, Comfort, Joy, Ambition, Anticipat...","[Vincent Van Gogh, Theo Van Gogh(mentioned), S...","This scene depicts the young Van Gogh in 1873,...",8,[None],This scene is both captivating and enlightenin...,"We find ourselves in the year 1873, in the pat...",:\nThe intimate sanctuary of Van Gogh's small ...,My dear Theo... I found comfort in hearing you...,0.42,0.4
3,004_V-T_004.pdf,1873-01-28,T-H,VvG,TvG,"Letter 004\nThe Hague, January 28 1873\nDear T...","NARRATOR (V.O.)\nIn the winter of 1873, a youn...","Emotions:\n[Longing, Pity]\n\nCharacters:\n[Vi...",1873-01-28,"[Longing, Pity]","[Vincent van Gogh, Theo van Gogh(mentioned), U...",This scene describes a time when Van Gogh was ...,7,"[None specific, but might be loosely related t...",This revealing scene captures a thoughtful and...,"In the winter of 1873, a young Vincent van Gog...",Late January evening fills the room in a board...,"Dear Theo, \nI find solace in your letter and ...",-0.4,0.4
4,005_V-T_005.pdf,1873-03-17,T-H,VvG,TvG,"Letter 005\nThe Hague, 17 March 1873\nDear The...",NARRATOR (V.O.)\nWe are venturing into the lif...,"Emotions:\n[Anticipation, Nostalgia]\n\nCharac...",1873-03-17,"[Anticipation, Nostalgia]","[Vincent van Gogh, Theo van Gogh(mentioned), U...","The scene takes place in the spring of 1873, a...",8,,This is a significant moment in van Gogh's lif...,We are venturing into the life and mind of Vin...,":\nThe scene unfolds in the cozy, dimly lit ro...","Dear Theo, I long to hear of you and Uncle Hei...",0.55,0.15


In [None]:

df_expanded[df_expanded["Relevance"] == ""]

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,Characters,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal


In [None]:
df_expanded["Relevance"] = df_expanded['Relevance'].str.extract('(\d+)').astype(float)

In [None]:
df_expanded["Relevance"] = df_expanded["Relevance"]/10

In [None]:
df_expanded["Relevance"].unique()

array([0.8, 0.7, 0.9, 1. ])

In [None]:
df_expanded.head(6)

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,Characters,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal
0,001_V-T_001.pdf,1872-08-01,T-H,VvG,TvG,"Letter 001\nThe Hague, c. 18 August 1872\nDear...",NARRATOR (V.O.)\nIn the period post the depart...,"Emotions:\n[Melancholy, Nostalgia]\n\nCharacte...",1872-08-01,"[Melancholy, Nostalgia]","[Vincent Van Gogh, Theo Van Gogh(mentioned), H...",This scene is set in the winter of 1872 in Van...,0.8,(None provided in the scene),This poignant scene underscores the emotional ...,In the period post the departure of his brothe...,As the winter rain dwarfs against the aged gla...,"Dearest Theo, your absence in the homestead ha...",-0.1,-0.3
1,002_V-T_002.pdf,1872-12-13,T-H,VvG,TvG,"Letter 002\nThe Hague, 13 Dec 1872\nDear Theo,...","NARRATOR (V.O.)\nIn 1872, Vincent van Gogh, kn...","Emotions:\n[Joy, Eagerness]\n\nCharacters:\n[V...",1872-12-13,"[Joy, Eagerness]","[Vincent Van Gogh, Theo Van Gogh(mentioned), R...",This scene reflects a young Vincent van Gogh's...,0.7,[None],This scene is rather significant in understand...,"In 1872, Vincent van Gogh, known for his vivid...",:\nThis scene takes place in a dimly lit room ...,"Dear Theo, Words could hardly express my joy i...",0.9,0.7
2,003_V-T_003.pdf,1873-01-01,T-H,VvG,TvG,"Letter 003\nThe Hague, January 1873\nMy dear T...",NARRATOR (V.O.)\nWe find ourselves in the year...,"Emotions:\n[Loneliness, Comfort, Joy, Ambition...",1873-01-01,"[Loneliness, Comfort, Joy, Ambition, Anticipat...","[Vincent Van Gogh, Theo Van Gogh(mentioned), S...","This scene depicts the young Van Gogh in 1873,...",0.8,[None],This scene is both captivating and enlightenin...,"We find ourselves in the year 1873, in the pat...",:\nThe intimate sanctuary of Van Gogh's small ...,My dear Theo... I found comfort in hearing you...,0.42,0.4
3,004_V-T_004.pdf,1873-01-28,T-H,VvG,TvG,"Letter 004\nThe Hague, January 28 1873\nDear T...","NARRATOR (V.O.)\nIn the winter of 1873, a youn...","Emotions:\n[Longing, Pity]\n\nCharacters:\n[Vi...",1873-01-28,"[Longing, Pity]","[Vincent van Gogh, Theo van Gogh(mentioned), U...",This scene describes a time when Van Gogh was ...,0.7,"[None specific, but might be loosely related t...",This revealing scene captures a thoughtful and...,"In the winter of 1873, a young Vincent van Gog...",Late January evening fills the room in a board...,"Dear Theo, \nI find solace in your letter and ...",-0.4,0.4
4,005_V-T_005.pdf,1873-03-17,T-H,VvG,TvG,"Letter 005\nThe Hague, 17 March 1873\nDear The...",NARRATOR (V.O.)\nWe are venturing into the lif...,"Emotions:\n[Anticipation, Nostalgia]\n\nCharac...",1873-03-17,"[Anticipation, Nostalgia]","[Vincent van Gogh, Theo van Gogh(mentioned), U...","The scene takes place in the spring of 1873, a...",0.8,,This is a significant moment in van Gogh's lif...,We are venturing into the life and mind of Vin...,":\nThe scene unfolds in the cozy, dimly lit ro...","Dear Theo, I long to hear of you and Uncle Hei...",0.55,0.15
5,006_V-T_006.pdf,1873-03-24,T-H,VvG,TvG,"Letter 006\nThe Hague, 24th March 1875\nDear T...",NARRATOR (V.O.)\nWe find Van Gogh at the start...,"Emotions:\n[Anxiety, Hopefulness]\n\nCharacter...",1873-03-24,"[Anxiety, Hopefulness]","[Van Gogh, Theo(Van Gogh's Brother), Mr. Schmi...",The scene takes place in 1875 at the start of ...,0.8,[The Schotel painting(incomplete version)],"In this scene Van Gogh exhibits the earnest, p...",We find Van Gogh at the start of his artistic ...,":\nThe scene unfolds within the modest, candle...","""Dear Theo, I am impelled to correspond with y...",-0.05,0.6


#3. Location Extractor

In [None]:
di = {
"T-H":"The Hague, Netherlands",
"Nu":"Nuenen, Netherlands",
"Ar":"Arles, France",
"S-R":"Saint-Rémy, France",
"A-s-O":"Auvers-sur-Oise, France"}

In [None]:
df = df_expanded

In [None]:
df['location'] = df_expanded['origin'].apply(lambda x: di.get(x, x))

In [None]:
df.head(10)

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,...,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal,location
0,001_V-T_001.pdf,1872-08-01,T-H,VvG,TvG,"Letter 001\nThe Hague, c. 18 August 1872\nDear...",NARRATOR (V.O.)\nIn the period post the depart...,"Emotions:\n[Melancholy, Nostalgia]\n\nCharacte...",1872-08-01,"[Melancholy, Nostalgia]",...,This scene is set in the winter of 1872 in Van...,0.8,(None provided in the scene),This poignant scene underscores the emotional ...,In the period post the departure of his brothe...,As the winter rain dwarfs against the aged gla...,"Dearest Theo, your absence in the homestead ha...",-0.1,-0.3,"The Hague, Netherlands"
1,002_V-T_002.pdf,1872-12-13,T-H,VvG,TvG,"Letter 002\nThe Hague, 13 Dec 1872\nDear Theo,...","NARRATOR (V.O.)\nIn 1872, Vincent van Gogh, kn...","Emotions:\n[Joy, Eagerness]\n\nCharacters:\n[V...",1872-12-13,"[Joy, Eagerness]",...,This scene reflects a young Vincent van Gogh's...,0.7,[None],This scene is rather significant in understand...,"In 1872, Vincent van Gogh, known for his vivid...",:\nThis scene takes place in a dimly lit room ...,"Dear Theo, Words could hardly express my joy i...",0.9,0.7,"The Hague, Netherlands"
2,003_V-T_003.pdf,1873-01-01,T-H,VvG,TvG,"Letter 003\nThe Hague, January 1873\nMy dear T...",NARRATOR (V.O.)\nWe find ourselves in the year...,"Emotions:\n[Loneliness, Comfort, Joy, Ambition...",1873-01-01,"[Loneliness, Comfort, Joy, Ambition, Anticipat...",...,"This scene depicts the young Van Gogh in 1873,...",0.8,[None],This scene is both captivating and enlightenin...,"We find ourselves in the year 1873, in the pat...",:\nThe intimate sanctuary of Van Gogh's small ...,My dear Theo... I found comfort in hearing you...,0.42,0.4,"The Hague, Netherlands"
3,004_V-T_004.pdf,1873-01-28,T-H,VvG,TvG,"Letter 004\nThe Hague, January 28 1873\nDear T...","NARRATOR (V.O.)\nIn the winter of 1873, a youn...","Emotions:\n[Longing, Pity]\n\nCharacters:\n[Vi...",1873-01-28,"[Longing, Pity]",...,This scene describes a time when Van Gogh was ...,0.7,"[None specific, but might be loosely related t...",This revealing scene captures a thoughtful and...,"In the winter of 1873, a young Vincent van Gog...",Late January evening fills the room in a board...,"Dear Theo, \nI find solace in your letter and ...",-0.4,0.4,"The Hague, Netherlands"
4,005_V-T_005.pdf,1873-03-17,T-H,VvG,TvG,"Letter 005\nThe Hague, 17 March 1873\nDear The...",NARRATOR (V.O.)\nWe are venturing into the lif...,"Emotions:\n[Anticipation, Nostalgia]\n\nCharac...",1873-03-17,"[Anticipation, Nostalgia]",...,"The scene takes place in the spring of 1873, a...",0.8,,This is a significant moment in van Gogh's lif...,We are venturing into the life and mind of Vin...,":\nThe scene unfolds in the cozy, dimly lit ro...","Dear Theo, I long to hear of you and Uncle Hei...",0.55,0.15,"The Hague, Netherlands"
5,006_V-T_006.pdf,1873-03-24,T-H,VvG,TvG,"Letter 006\nThe Hague, 24th March 1875\nDear T...",NARRATOR (V.O.)\nWe find Van Gogh at the start...,"Emotions:\n[Anxiety, Hopefulness]\n\nCharacter...",1873-03-24,"[Anxiety, Hopefulness]",...,The scene takes place in 1875 at the start of ...,0.8,[The Schotel painting(incomplete version)],"In this scene Van Gogh exhibits the earnest, p...",We find Van Gogh at the start of his artistic ...,":\nThe scene unfolds within the modest, candle...","""Dear Theo, I am impelled to correspond with y...",-0.05,0.6,"The Hague, Netherlands"
6,007_V-T_007.pdf,1873-05-05,T-H,VvG,TvG,"Letter 007\nThe Hague, 5 May 1873\nDear Theo,\...",NARRATOR (V.O.)\nWe're deep in the moment of V...,"Emotions:\n[Anxiety, Hope, Guilt, Nostalgia]\n...",1873-05-05,"[Anxiety, Hope, Guilt, Nostalgia]",...,"This scene takes place in May 1873, a pivotal ...",0.8,[N/A],This pivotal moment encapsulates many of the t...,"We're deep in the moment of Van Gogh's life, M...","We are in a small, modest room, cluttered with...",Dear Theo... News from The Hague. You must for...,-0.075,0.175,"The Hague, Netherlands"
7,008_V-T_008.pdf,1873-05-09,T-H,VvG,TvG,"Letter 008\nThe Hague, 9th May 1873\nDear Theo...","NARRATOR (V.O.)\nIn 1873, an emotional unsettl...","Emotions:\n[Sorrowful, Unsettled]\n\nCharacter...",1873-05-09,"[Sorrowful, Unsettled]",...,This scene represents when Van Gogh was about ...,0.8,[None],The scene under review is crucial in understan...,"In 1873, an emotional unsettled Van Gogh was t...","May 9, 1873, a humble Dutch residence in The H...",,-0.8,0.5,"The Hague, Netherlands"
8,009_V-T_009.pdf,1873-06-13,London,VvG,TvG,"Letter 009\nLondon, 13 June 1873\nDear Theo,\n...","NARRATOR (V.O.)\nThe time is June 13, 1873. Vi...","Emotions:\n[Loneliness, Contentment]\n\nCharac...",1873-06-13,"[Loneliness, Contentment]",...,This scene takes place in 1873 when Van Gogh w...,0.8,[None],This scene is significantly rich for reflectin...,"The time is June 13, 1873. Vincent Van Gogh is...","A gloomy day in London, where the streets are ...","Dearest Theo, it's been a while. I am writing ...",0.0,0.0,London
9,010_V-H_009a.pdf,1873-06-13,London,VvG,The Van Stockum-Haanebeek family,"Letter 009a\nLondon, 2 July 1873\n[Letter to T...","NARRATOR (V.O.)\nIn the summer of 1873, Vincen...","Emotions:\n[Overwhelmed, Longing]\n\nCharacter...",1873-06-13,"[Overwhelmed, Longing]",...,This scene is set in Van Gogh's early adulthoo...,0.7,"[Vincent's Bedroom in Arles, The Pink Peach Tr...",The given scene is crucial in understanding th...,"In the summer of 1873, Vincent Van Gogh finds ...",":\nWe begin in the dimly lit, intimate, and qu...","Dear friends, I am overwhelmed by the quiet be...",-0.35,0.7,London


In [None]:
df_locations = pd.DataFrame(df["location"], columns=["location"])
df_locations = pd.DataFrame(pd.Series(df_locations["location"].unique()), columns=["location"])

In [None]:
import requests
def get_lat_long(place_name):
    # This uses OpenCage Geocoder as an example; replace YOUR_API_KEY with an actual API key
    url = "https://api.opencagedata.com/geocode/v1/json"
    params = {
        'q': place_name,
        'key': ''
    }
    response = requests.get(url, params=params)
    data = response.json()
    if data['results']:
        # Extracting the first result as an example
        lat = data['results'][0]['geometry']['lat']
        lng = data['results'][0]['geometry']['lng']
        return lat, lng
    else:
        return "Coordinates not found", "Coordinates not found"

In [None]:
import time
def append_lat_long(df, column_name, start_index, end_index):
    # Ensure we don't exceed the DataFrame's bounds
    end_index = min(end_index, len(df))

    # Iterate through the specified range
    for i in range(start_index, end_index):
        if i % 5 == 0:
          print("current",i)
        place_name = df.iloc[i][column_name]
        lat, lng = get_lat_long(place_name)

        # Directly set the values for each row to avoid length mismatch
        df.at[i, 'Latitude'] = lat
        df.at[i, 'Longitude'] = lng

        # Delay between API calls
        time.sleep(1.2)

In [None]:
df_locations

Unnamed: 0,location
0,"The Hague, Netherlands"
1,London
2,Helvoirt
3,Paris
4,Etten
5,Ramsgate
6,Welwyn
7,Isleworth
8,Dordrecht
9,Amsterdam


In [None]:
append_lat_long(df_locations,"location",0,df_locations.size)

current 0
current 5
current 10
current 15
current 20
current 25


In [None]:
df_locations.head(15)

Unnamed: 0,location,Latitude,Longitude
0,"The Hague, Netherlands",52.079984,4.311346
1,London,51.507446,-0.127765
2,Helvoirt,51.633639,5.229065
3,Paris,48.85889,2.320041
4,Etten,51.916117,6.340463
5,Ramsgate,51.333473,1.419648
6,Welwyn,51.828687,-0.214175
7,Isleworth,51.468356,-0.326311
8,Dordrecht,51.795881,4.677935
9,Amsterdam,52.37308,4.892453


In [None]:
df_locations.head()

Unnamed: 0,location,Latitude,Longitude
0,"The Hague, Netherlands",52.079984,4.311346
1,London,51.507446,-0.127765
2,Helvoirt,51.633639,5.229065
3,Paris,48.85889,2.320041
4,Etten,51.916117,6.340463


In [None]:
df_merged = df.merge(df_locations, how='left', on='location')

In [None]:
df_merged

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,...,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal,location,Latitude,Longitude
0,001_V-T_001.pdf,1872-08-01,T-H,VvG,TvG,"Letter 001\nThe Hague, c. 18 August 1872\nDear...",NARRATOR (V.O.)\nIn the period post the depart...,"Emotions:\n[Melancholy, Nostalgia]\n\nCharacte...",1872-08-01,"[Melancholy, Nostalgia]",...,(None provided in the scene),This poignant scene underscores the emotional ...,In the period post the departure of his brothe...,As the winter rain dwarfs against the aged gla...,"Dearest Theo, your absence in the homestead ha...",-0.100000,-0.300,"The Hague, Netherlands",52.079984,4.311346
1,002_V-T_002.pdf,1872-12-13,T-H,VvG,TvG,"Letter 002\nThe Hague, 13 Dec 1872\nDear Theo,...","NARRATOR (V.O.)\nIn 1872, Vincent van Gogh, kn...","Emotions:\n[Joy, Eagerness]\n\nCharacters:\n[V...",1872-12-13,"[Joy, Eagerness]",...,[None],This scene is rather significant in understand...,"In 1872, Vincent van Gogh, known for his vivid...",:\nThis scene takes place in a dimly lit room ...,"Dear Theo, Words could hardly express my joy i...",0.900000,0.700,"The Hague, Netherlands",52.079984,4.311346
2,003_V-T_003.pdf,1873-01-01,T-H,VvG,TvG,"Letter 003\nThe Hague, January 1873\nMy dear T...",NARRATOR (V.O.)\nWe find ourselves in the year...,"Emotions:\n[Loneliness, Comfort, Joy, Ambition...",1873-01-01,"[Loneliness, Comfort, Joy, Ambition, Anticipat...",...,[None],This scene is both captivating and enlightenin...,"We find ourselves in the year 1873, in the pat...",:\nThe intimate sanctuary of Van Gogh's small ...,My dear Theo... I found comfort in hearing you...,0.420000,0.400,"The Hague, Netherlands",52.079984,4.311346
3,004_V-T_004.pdf,1873-01-28,T-H,VvG,TvG,"Letter 004\nThe Hague, January 28 1873\nDear T...","NARRATOR (V.O.)\nIn the winter of 1873, a youn...","Emotions:\n[Longing, Pity]\n\nCharacters:\n[Vi...",1873-01-28,"[Longing, Pity]",...,"[None specific, but might be loosely related t...",This revealing scene captures a thoughtful and...,"In the winter of 1873, a young Vincent van Gog...",Late January evening fills the room in a board...,"Dear Theo, \nI find solace in your letter and ...",-0.400000,0.400,"The Hague, Netherlands",52.079984,4.311346
4,005_V-T_005.pdf,1873-03-17,T-H,VvG,TvG,"Letter 005\nThe Hague, 17 March 1873\nDear The...",NARRATOR (V.O.)\nWe are venturing into the lif...,"Emotions:\n[Anticipation, Nostalgia]\n\nCharac...",1873-03-17,"[Anticipation, Nostalgia]",...,,This is a significant moment in van Gogh's lif...,We are venturing into the life and mind of Vin...,":\nThe scene unfolds in the cozy, dimly lit ro...","Dear Theo, I long to hear of you and Uncle Hei...",0.550000,0.150,"The Hague, Netherlands",52.079984,4.311346
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
857,802_V-W_W19.pdf,1890-01-20,S-R,VvG,Wilhelmina van Gogh,"W19 1\nSaint-Rémy, 20 January 1890\nDear Siste...","NARRATOR (V.O.)\nIn this scene, we examine a t...","Emotions:\n[Anxiety, Longing, Restlessness, So...",1890-01-20,"[Anxiety, Longing, Restlessness, Solace]",...,['Women in the Olive Orchard'],"In this scene, we see Van Gogh wrestling with ...","In this scene, we examine a time in Vincent va...",:\nThis scene is set in the quintessentially l...,"Dear Sister, \nThe Paris I was once entranced ...",-0.250000,0.425,"Saint-Rémy, France",47.636721,4.299632
858,813_V-W_W20.pdf,1890-02-20,S-R,VvG,Wilhelmina van Gogh,"W20 1\nSaint-Rémy, c. 20 February 1890\nMy Dea...","NARRATOR (V.O)\nIn February 1890, post-impress...","Emotions:\n[Anxiety, Solitude, Gratitude, Year...",1890-02-20,"[Anxiety, Solitude, Gratitude, Yearning, Intri...",...,"['Branches with Almond Blossoms', 'The Arlesie...",The scene presents a thoughtful and introspect...,,"The rustic room at the asylum, illuminated onl...","My Dear Sister, \n\nIt appears as though ordin...",0.083333,0.300,"Saint-Rémy, France",47.636721,4.299632
859,831_V-W_W21.pdf,1890-05-20,S-R,VvG,Wilhelmina van Gogh,"W21 1\nAuvers, c. 20 May 1890\nMy Dear Sister,...",NARRATOR (V.O.)\nWe find ourselves in the late...,"Emotions:\n[Gratitude, Contentment]\n\nCharact...",1890-05-20,"[Gratitude, Contentment]",...,"[Roses, Violets, Olive Orchards]",This scene is quite significant and encapsulat...,We find ourselves in the late spring of 1890. ...,"The scene starts in a modest room, bathed in t...","My dear Sister,\nThrongs of unuttered thoughts...",0.850000,-0.300,"Saint-Rémy, France",47.636721,4.299632
860,839_V-W_W22.pdf,1890-06-05,A-s-O,VvG,Wilhelmina van Gogh,"W22 1\nAuvers-sur-Oise, 5 June 1890\nMy Dear S...","NARRATOR (V.O.)\nWe are in the summer of 1890,...","Emotions:\n[Determination, Longing]\n\nCharact...",1890-06-05,"[Determination, Longing]",...,"[""Dr. Gachet's Portrait"", ""The Church at Auvers""]",The scene under examination showcases the intr...,"We are in the summer of 1890, Vincent Van Gogh...",A small room bathed in the warm hues of the ev...,"June 5th, 1890...Auvers-sur-Oise. My dearest s...",0.200000,0.650,"Auvers-sur-Oise, France",49.070188,2.171869


In [None]:
df_merged[df_merged["file_name"]=="572a.pdf"]

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,...,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal,location,Latitude,Longitude
619,572a.pdf,1889-01-22,Ar,VvG,Paul Gauguin,"Letter 572a\nArles, c. 22 January 1889\nMy dea...",NARRATOR (V.O.)\nAs Vincent van Gogh writes to...,"Emotions:\n[Isolation, Anxiety]\n\nCharacters:...",1889-01-22,"[Isolation, Anxiety]",...,"[""Sunflowers"", ""La Berceuse (Woman Rocking a C...",This scene offers valuable insights into Van G...,As Vincent van Gogh writes to his dear friend ...,"The camera pans through a vivid, saturated sce...","My dear friend Gauguin, thank you for your let...",-0.8,0.3,"Arles, France",43.677622,4.630965


In [None]:
df_merged.loc[619, "file_name"] = "900"

In [None]:
df_merged[df_merged["file_name"]=="572a.pdf"]

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,...,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal,location,Latitude,Longitude


In [None]:
df_numbers = pd.to_numeric(df_merged["file_name"].apply(lambda x: x[0:3]), errors="coerce")

In [None]:
for index in df_numbers.index:
    if pd.isnull(df_numbers[index]):
        df_numbers[index] = index+900

print(df_numbers)

0        1.0
1        2.0
2        3.0
3        4.0
4        5.0
       ...  
857    802.0
858    813.0
859    831.0
860    839.0
861    844.0
Name: file_name, Length: 862, dtype: float64


In [None]:
repeated_numbers = df_numbers.value_counts()[lambda x: x > 1].index.tolist()

repeated_numbers

[]

In [None]:
df_numbers.unique().size

862

In [None]:
df_numbers.size

862

In [None]:
df_merged["uid"]=df_numbers.apply(lambda x: 50.0+(x/1000))

In [None]:
df_merged.head()

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,...,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal,location,Latitude,Longitude,uid
0,001_V-T_001.pdf,1872-08-01,T-H,VvG,TvG,"Letter 001\nThe Hague, c. 18 August 1872\nDear...",NARRATOR (V.O.)\nIn the period post the depart...,"Emotions:\n[Melancholy, Nostalgia]\n\nCharacte...",1872-08-01,"[Melancholy, Nostalgia]",...,This poignant scene underscores the emotional ...,In the period post the departure of his brothe...,As the winter rain dwarfs against the aged gla...,"Dearest Theo, your absence in the homestead ha...",-0.1,-0.3,"The Hague, Netherlands",52.079984,4.311346,50.001
1,002_V-T_002.pdf,1872-12-13,T-H,VvG,TvG,"Letter 002\nThe Hague, 13 Dec 1872\nDear Theo,...","NARRATOR (V.O.)\nIn 1872, Vincent van Gogh, kn...","Emotions:\n[Joy, Eagerness]\n\nCharacters:\n[V...",1872-12-13,"[Joy, Eagerness]",...,This scene is rather significant in understand...,"In 1872, Vincent van Gogh, known for his vivid...",:\nThis scene takes place in a dimly lit room ...,"Dear Theo, Words could hardly express my joy i...",0.9,0.7,"The Hague, Netherlands",52.079984,4.311346,50.002
2,003_V-T_003.pdf,1873-01-01,T-H,VvG,TvG,"Letter 003\nThe Hague, January 1873\nMy dear T...",NARRATOR (V.O.)\nWe find ourselves in the year...,"Emotions:\n[Loneliness, Comfort, Joy, Ambition...",1873-01-01,"[Loneliness, Comfort, Joy, Ambition, Anticipat...",...,This scene is both captivating and enlightenin...,"We find ourselves in the year 1873, in the pat...",:\nThe intimate sanctuary of Van Gogh's small ...,My dear Theo... I found comfort in hearing you...,0.42,0.4,"The Hague, Netherlands",52.079984,4.311346,50.003
3,004_V-T_004.pdf,1873-01-28,T-H,VvG,TvG,"Letter 004\nThe Hague, January 28 1873\nDear T...","NARRATOR (V.O.)\nIn the winter of 1873, a youn...","Emotions:\n[Longing, Pity]\n\nCharacters:\n[Vi...",1873-01-28,"[Longing, Pity]",...,This revealing scene captures a thoughtful and...,"In the winter of 1873, a young Vincent van Gog...",Late January evening fills the room in a board...,"Dear Theo, \nI find solace in your letter and ...",-0.4,0.4,"The Hague, Netherlands",52.079984,4.311346,50.004
4,005_V-T_005.pdf,1873-03-17,T-H,VvG,TvG,"Letter 005\nThe Hague, 17 March 1873\nDear The...",NARRATOR (V.O.)\nWe are venturing into the lif...,"Emotions:\n[Anticipation, Nostalgia]\n\nCharac...",1873-03-17,"[Anticipation, Nostalgia]",...,This is a significant moment in van Gogh's lif...,We are venturing into the life and mind of Vin...,":\nThe scene unfolds in the cozy, dimly lit ro...","Dear Theo, I long to hear of you and Uncle Hei...",0.55,0.15,"The Hague, Netherlands",52.079984,4.311346,50.005


In [None]:
import folium

df_locations['Latitude'] = pd.to_numeric(df_locations['Latitude'], errors='coerce')
df_locations['Longitude'] = pd.to_numeric(df_locations['Longitude'], errors='coerce')

# Filter out rows with NaN in either Latitude or Longitude
df_filtered = df_locations.dropna(subset=['Latitude', 'Longitude'])

# Create a map centered at an average location
map_center = [df_filtered['Latitude'].mean(), df_filtered['Longitude'].mean()]
m = folium.Map(location=map_center, zoom_start=4)

# Add a red dot marker for each coordinate pair
for idx, row in df_filtered.iterrows():
    folium.CircleMarker([row['Latitude'], row['Longitude']],
                        radius=5,
                        color='red',
                        fill=True,
                        fill_color='red',
                        fill_opacity=0.6).add_to(m)

# Save or show the map
m.save('map.html')  # Saves the map to an HTML file
# or use m to display in a Jupyter notebook directly

In [None]:
df_locations_sorted = df_locations.sort_index()
df_locations_sorted

Unnamed: 0,location,Latitude,Longitude
0,"The Hague, Netherlands",52.079984,4.311346
1,London,51.507446,-0.127765
2,Helvoirt,51.633639,5.229065
3,Paris,48.85889,2.320041
4,Etten,51.916117,6.340463
5,Ramsgate,51.333473,1.419648
6,Welwyn,51.828687,-0.214175
7,Isleworth,51.468356,-0.326311
8,Dordrecht,51.795881,4.677935
9,Amsterdam,52.37308,4.892453


In [None]:
df_merged.tail()

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,...,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal,location,Latitude,Longitude,uid
857,802_V-W_W19.pdf,1890-01-20,S-R,VvG,Wilhelmina van Gogh,"W19 1\nSaint-Rémy, 20 January 1890\nDear Siste...","NARRATOR (V.O.)\nIn this scene, we examine a t...","Emotions:\n[Anxiety, Longing, Restlessness, So...",1890-01-20,"[Anxiety, Longing, Restlessness, Solace]",...,"In this scene, we see Van Gogh wrestling with ...","In this scene, we examine a time in Vincent va...",:\nThis scene is set in the quintessentially l...,"Dear Sister, \nThe Paris I was once entranced ...",-0.25,0.425,"Saint-Rémy, France",47.636721,4.299632,50.802
858,813_V-W_W20.pdf,1890-02-20,S-R,VvG,Wilhelmina van Gogh,"W20 1\nSaint-Rémy, c. 20 February 1890\nMy Dea...","NARRATOR (V.O)\nIn February 1890, post-impress...","Emotions:\n[Anxiety, Solitude, Gratitude, Year...",1890-02-20,"[Anxiety, Solitude, Gratitude, Yearning, Intri...",...,The scene presents a thoughtful and introspect...,,"The rustic room at the asylum, illuminated onl...","My Dear Sister, \n\nIt appears as though ordin...",0.083333,0.3,"Saint-Rémy, France",47.636721,4.299632,50.813
859,831_V-W_W21.pdf,1890-05-20,S-R,VvG,Wilhelmina van Gogh,"W21 1\nAuvers, c. 20 May 1890\nMy Dear Sister,...",NARRATOR (V.O.)\nWe find ourselves in the late...,"Emotions:\n[Gratitude, Contentment]\n\nCharact...",1890-05-20,"[Gratitude, Contentment]",...,This scene is quite significant and encapsulat...,We find ourselves in the late spring of 1890. ...,"The scene starts in a modest room, bathed in t...","My dear Sister,\nThrongs of unuttered thoughts...",0.85,-0.3,"Saint-Rémy, France",47.636721,4.299632,50.831
860,839_V-W_W22.pdf,1890-06-05,A-s-O,VvG,Wilhelmina van Gogh,"W22 1\nAuvers-sur-Oise, 5 June 1890\nMy Dear S...","NARRATOR (V.O.)\nWe are in the summer of 1890,...","Emotions:\n[Determination, Longing]\n\nCharact...",1890-06-05,"[Determination, Longing]",...,The scene under examination showcases the intr...,"We are in the summer of 1890, Vincent Van Gogh...",A small room bathed in the warm hues of the ev...,"June 5th, 1890...Auvers-sur-Oise. My dearest s...",0.2,0.65,"Auvers-sur-Oise, France",49.070188,2.171869,50.839
861,844_V-W_W23.pdf,1890-06-12,A-s-O,VvG,Wilhelmina van Gogh,"Letter W23 1\nAuvers-sur-Oise, c. 12 June 1890...",NARRATOR (V.O.)\nJune 1890. Van Gogh is in Auv...,"Emotions:\n[Desperation, Burdened, Longing, Ur...",1890-06-12,"[Desperation, Burdened, Longing, Urgency]",...,"The scene is strategically set in June 1890, o...","June 1890. Van Gogh is in Auvers-sur-Oise, liv...",A dimly lit room. Shafts of pale morning light...,"My dear sister, the desperately swift passing ...",-0.6,0.675,"Auvers-sur-Oise, France",49.070188,2.171869,50.844


In [None]:
df_merged.columns

Index(['file_name', 'date', 'origin', 'sender', 'receiver', 'content',
       'API_Response', 'Scene_Analysis', 'timestamp', 'Emotions', 'Characters',
       'Context', 'Relevance', 'Paintings', 'Comment', 'NARRATOR (V.O.)',
       'BACKGROUND', 'VINCENT VAN GOGH (V.O.)', 'Valence', 'Arousal',
       'location', 'Latitude', 'Longitude', 'uid'],
      dtype='object')

In [None]:
df_expanded.head()

Unnamed: 0,file_name,date,origin,sender,receiver,content,API_Response,Scene_Analysis,timestamp,Emotions,...,Context,Relevance,Paintings,Comment,NARRATOR (V.O.),BACKGROUND,VINCENT VAN GOGH (V.O.),Valence,Arousal,location
0,001_V-T_001.pdf,1872-08-01,T-H,VvG,TvG,"Letter 001\nThe Hague, c. 18 August 1872\nDear...",NARRATOR (V.O.)\nIn the period post the depart...,"Emotions:\n[Melancholy, Nostalgia]\n\nCharacte...",1872-08-01,"[Melancholy, Nostalgia]",...,This scene is set in the winter of 1872 in Van...,0.8,(None provided in the scene),This poignant scene underscores the emotional ...,In the period post the departure of his brothe...,As the winter rain dwarfs against the aged gla...,"Dearest Theo, your absence in the homestead ha...",-0.1,-0.3,"The Hague, Netherlands"
1,002_V-T_002.pdf,1872-12-13,T-H,VvG,TvG,"Letter 002\nThe Hague, 13 Dec 1872\nDear Theo,...","NARRATOR (V.O.)\nIn 1872, Vincent van Gogh, kn...","Emotions:\n[Joy, Eagerness]\n\nCharacters:\n[V...",1872-12-13,"[Joy, Eagerness]",...,This scene reflects a young Vincent van Gogh's...,0.7,[None],This scene is rather significant in understand...,"In 1872, Vincent van Gogh, known for his vivid...",:\nThis scene takes place in a dimly lit room ...,"Dear Theo, Words could hardly express my joy i...",0.9,0.7,"The Hague, Netherlands"
2,003_V-T_003.pdf,1873-01-01,T-H,VvG,TvG,"Letter 003\nThe Hague, January 1873\nMy dear T...",NARRATOR (V.O.)\nWe find ourselves in the year...,"Emotions:\n[Loneliness, Comfort, Joy, Ambition...",1873-01-01,"[Loneliness, Comfort, Joy, Ambition, Anticipat...",...,"This scene depicts the young Van Gogh in 1873,...",0.8,[None],This scene is both captivating and enlightenin...,"We find ourselves in the year 1873, in the pat...",:\nThe intimate sanctuary of Van Gogh's small ...,My dear Theo... I found comfort in hearing you...,0.42,0.4,"The Hague, Netherlands"
3,004_V-T_004.pdf,1873-01-28,T-H,VvG,TvG,"Letter 004\nThe Hague, January 28 1873\nDear T...","NARRATOR (V.O.)\nIn the winter of 1873, a youn...","Emotions:\n[Longing, Pity]\n\nCharacters:\n[Vi...",1873-01-28,"[Longing, Pity]",...,This scene describes a time when Van Gogh was ...,0.7,"[None specific, but might be loosely related t...",This revealing scene captures a thoughtful and...,"In the winter of 1873, a young Vincent van Gog...",Late January evening fills the room in a board...,"Dear Theo, \nI find solace in your letter and ...",-0.4,0.4,"The Hague, Netherlands"
4,005_V-T_005.pdf,1873-03-17,T-H,VvG,TvG,"Letter 005\nThe Hague, 17 March 1873\nDear The...",NARRATOR (V.O.)\nWe are venturing into the lif...,"Emotions:\n[Anticipation, Nostalgia]\n\nCharac...",1873-03-17,"[Anticipation, Nostalgia]",...,"The scene takes place in the spring of 1873, a...",0.8,,This is a significant moment in van Gogh's lif...,We are venturing into the life and mind of Vin...,":\nThe scene unfolds in the cozy, dimly lit ro...","Dear Theo, I long to hear of you and Uncle Hei...",0.55,0.15,"The Hague, Netherlands"


In [None]:
df_pre_final = df_merged[["uid","content","BACKGROUND","NARRATOR (V.O.)","VINCENT VAN GOGH (V.O.)","Context","Comment","Characters","Valence","Arousal","timestamp","Latitude","Longitude","Relevance"]]

In [None]:
df_final = df_pre_final.rename(columns={"content":"chunk", "BACKGROUND": "background","NARRATOR (V.O.)":"narrator","VINCENT VAN GOGH (V.O.)":"vangogh","Context":"context","Comment":"comment","Characters":"characters","Valence":"valence","Arousal":"arousal","Latitude":"latitude","Longitude":"longitude","Relevance":"relevance"})

In [None]:
df_final.to_csv("letters_cognitive_dataset_van_gogh.csv")