# Sentiment Analysis -- Richards

In [1]:
import re, csv, glob, spacy, warnings, sys, os
import pandas as pd
import numpy as np
from textblob import TextBlob

# Import project-specific functions. 
# Python files (.py) have to be in same folder to work.
lib_path = os.path.abspath(os.path.join(os.path.dirname('Correspondence_XML_parser.py'), '../Scripts'))
sys.path.append(lib_path)

from Correspondence_XML_parser import *

nlp = spacy.load('en_core_web_sm')

# Ignore warnings related to deprecated functions.
warnings.filterwarnings('ignore')

## Gather XML Files

In [2]:
%%time

# Declare directory location to shorten filepaths later.
abs_dir = "/Users/quinn.wi/Documents/Data"
richards_files = glob.glob(abs_dir + "/PSC/Richards/ESR-XML-Files-MHS/*.xml")

len(richards_files)

CPU times: user 847 µs, sys: 977 µs, total: 1.82 ms
Wall time: 2.17 ms


20

## Build Dataframe

In [3]:
%%time

# Build dataframe from XML files.
# build_dataframe() called from Correspondence_XML_parser
df = build_dataframe(richards_files)

df.head(3)

/Users/quinn.wi/Documents/Data/PSC/Richards/ESR-XML-Files-MHS/ESR-EDA-1893-09-24.xml 

CPU times: user 9.52 ms, sys: 3.36 ms, total: 12.9 ms
Wall time: 14.7 ms


Unnamed: 0,file,date,source,target,subjects,references,text
0,ESR-EDA-1892-01-08.xml,1892-01-08,richards-ellen,atkinson-edward,"1893 Chicago World's Fair,Aladdin Oven,New Eng...","palmer-bertha,hovey-e,daniells-unknown",Boston Jan 8 1892 My dear Mr Atkinson I enclo...
1,ESR-EDA-1892-04-12.xml,1892-04-12,richards-ellen,atkinson-edward,"Aladdin Oven,nutrition,cooking",abel-mary,April 12— Dear Mr Atkinson I expect Mrs Abel ...
2,ESR-EDA-1892-04-07.xml,1892-04-07,richards-ellen,atkinson-edward,"Aladdin Oven,Nutrition,cooking","conro-emma,abel-mary","Boston, April 7, 1892 My dear Mr. Atkinson I ..."


## Get Sentiments

In [4]:
%%time

def get_sentiment(txt):
    blob = TextBlob(txt)
    return blob.sentiment.polarity

df['sentiment'] = df['text'].apply(get_sentiment)

df.head(3)

CPU times: user 55.7 ms, sys: 2.57 ms, total: 58.3 ms
Wall time: 57.1 ms


Unnamed: 0,file,date,source,target,subjects,references,text,sentiment
0,ESR-EDA-1892-01-08.xml,1892-01-08,richards-ellen,atkinson-edward,"1893 Chicago World's Fair,Aladdin Oven,New Eng...","palmer-bertha,hovey-e,daniells-unknown",Boston Jan 8 1892 My dear Mr Atkinson I enclo...,0.133333
1,ESR-EDA-1892-04-12.xml,1892-04-12,richards-ellen,atkinson-edward,"Aladdin Oven,nutrition,cooking",abel-mary,April 12— Dear Mr Atkinson I expect Mrs Abel ...,-0.110823
2,ESR-EDA-1892-04-07.xml,1892-04-07,richards-ellen,atkinson-edward,"Aladdin Oven,Nutrition,cooking","conro-emma,abel-mary","Boston, April 7, 1892 My dear Mr. Atkinson I ...",0.081991


## Save Data

In [5]:
%%time

# Save results to lab space for visualizations.
df[['date', 'file', 'sentiment']].to_csv(os.path.abspath('../../lab_space/projects/richards/sentiments/data/') + '/richards_sentiments.csv', 
          sep = ',', index = False)

CPU times: user 2.92 ms, sys: 1.63 ms, total: 4.56 ms
Wall time: 3.48 ms
