# Sentiment Analysis -- JQA

In [1]:
import re, csv, glob, spacy, warnings, sys, os
import pandas as pd
import numpy as np
from textblob import TextBlob

# Import project-specific functions. 
# Python files (.py) have to be in same folder to work.
lib_path = os.path.abspath(os.path.join(os.path.dirname('JQA_XML_parser.py'), '../Scripts'))
sys.path.append(lib_path)

from JQA_XML_parser import *

nlp = spacy.load('en_core_web_sm')

# Ignore warnings related to deprecated functions.
warnings.filterwarnings('ignore')

## Gather XML Files

In [2]:
%%time

# Declare directory location to shorten filepaths later.
abs_dir = "/Users/quinn.wi/Documents/Data"
files = glob.glob(abs_dir + "/PSC/JQA/*/*.xml")

len(files)

CPU times: user 3.52 ms, sys: 4 ms, total: 7.52 ms
Wall time: 6.32 ms


762

## Build Dataframe

In [3]:
%%time

# Build dataframe from XML files.
# build_dataframe() called from Correspondence_XML_parser
df = build_dataframe(files)

df.head(3)

CPU times: user 5.3 s, sys: 118 ms, total: 5.41 s
Wall time: 5.46 s


Unnamed: 0,file,entry,date,people,text
0,"('JQADiaries-v27-1808-08-p364.xml',)",jqadiaries-v27-1808-08-01,1808-08-01,"courtdegebelin-antoine,gregory-george,rousseau...","1. Bathed with George this morning, at the pla..."
1,"('JQADiaries-v27-1808-08-p364.xml',)",jqadiaries-v27-1808-08-02,1808-08-02,"degrand-peter,everett-alexander","2. Bathed again this Morning, and took George ..."
2,"('JQADiaries-v27-1808-08-p364.xml',)",jqadiaries-v27-1808-08-03,1808-08-03,"degrand-peter,welsh-thomas,davis-john,dawes-th...","3. Bathed this morning, at 6. with Mr: De Gran..."


## Get Sentiments

In [4]:
%%time

def get_sentiment(txt):
    blob = TextBlob(txt)
    return blob.sentiment.polarity

df['sentiment'] = df['text'].apply(get_sentiment)

df.head(3)

CPU times: user 30.5 s, sys: 156 ms, total: 30.6 s
Wall time: 30.9 s


Unnamed: 0,file,entry,date,people,text,sentiment
0,"('JQADiaries-v27-1808-08-p364.xml',)",jqadiaries-v27-1808-08-01,1808-08-01,"courtdegebelin-antoine,gregory-george,rousseau...","1. Bathed with George this morning, at the pla...",0.212121
1,"('JQADiaries-v27-1808-08-p364.xml',)",jqadiaries-v27-1808-08-02,1808-08-02,"degrand-peter,everett-alexander","2. Bathed again this Morning, and took George ...",-0.0475
2,"('JQADiaries-v27-1808-08-p364.xml',)",jqadiaries-v27-1808-08-03,1808-08-03,"degrand-peter,welsh-thomas,davis-john,dawes-th...","3. Bathed this morning, at 6. with Mr: De Gran...",0.048889


## Save Sentiments

In [5]:
%%time

# Save results to lab space for visualizations.
df[['date', 'file', 'sentiment']].to_csv(os.path.abspath('../../lab_space/projects/jqa/sentiments/data/') + '/jqa_sentiments.csv', 
          sep = ',', index = False)

CPU times: user 84 ms, sys: 8.27 ms, total: 92.3 ms
Wall time: 91.4 ms
