# Sentiment Analysis

## Preparation

In [None]:
# import statements

from functions.basic import read_file
from functions.processing import split_sents
from functions.sentiment import sentiws_glossary, get_polarity_values, get_germansentiment, compare_sentiment, map_sentiment
from functions.summary import apply_scaling
from functions.vis import write_vis

import plotly.graph_objects as go
import plotly.express as px
import pandas as pd

In [None]:
#datasets

j_all = read_file("data\\0_extraction_data\\j_all.pkl")
k_all = read_file("data\\0_extraction_data\\k_all.pkl")

#subsets
j_cited = read_file("data\\2_pos_data\\j_cited_2.pkl")
j_not_cited = read_file("data\\2_pos_data\\j_not_cited_2.pkl")
k_cited = read_file("data\\2_pos_data\\k_cited_2.pkl")
k_not_cited = read_file("data\\2_pos_data\\k_not_cited_2.pkl")

#define indizes to merge dataframes later on
j_cited_index = j_cited.index
j_not_cited_index = j_not_cited.index
k_cited_index = k_cited.index
k_not_cited_index = k_not_cited.index

In [None]:
# text processing
# this might take a while (should be less than 1 min.)

#subsets 

#k cited
k_c_text = [x for x in k_cited["text"]]
k_c_citnum = [x for x in k_cited["frequency"]]
k_c_sents = split_sents(k_c_text)

#k not cited
k_nc_text = [x for x in k_not_cited["text"]]
k_nc_citnum = [x for x in k_not_cited["frequency"]]
k_nc_sents = split_sents(k_nc_text)

#j cited
j_c_text = [x for x in j_cited["text"]]
j_c_citnum = [x for x in j_cited["frequency"]]
j_c_sents = split_sents(j_c_text)

#j not cited
j_nc_text = [x for x in j_not_cited["text"]]
j_nc_citnum = [x for x in j_not_cited["frequency"]]
j_nc_sents = split_sents(j_nc_text)

## SentiWS

In [None]:
# open SentiWS files and read them line per line

positive = open("C:\\Users\\sophi\\Documents\\masterarbeit\\code\\SentiWS_v2-0\\SentiWS_v2.0_Positive.txt", "r", encoding="utf-8")
negative = open("C:\\Users\\sophi\\Documents\\masterarbeit\\code\\SentiWS_v2-0\\SentiWS_v2.0_Negative.txt", "r", encoding="utf-8")
positive_lines = positive.readlines()
negative_lines = negative.readlines()

In [None]:
# create glossary from files

sentiws = sentiws_glossary(positive_lines, negative_lines)
sentiws

In [None]:
# create lists of polarity values for all passages
# this lexicon-based approach has many limitations, more sophisticated methods based on annotated data and machine learning would of course be better

j_c_sentiment = get_polarity_values(j_c_text, sentiws)
j_nc_sentiment = get_polarity_values(j_nc_text, sentiws)
k_c_sentiment = get_polarity_values(k_c_text, sentiws)
k_nc_sentiment = get_polarity_values(k_nc_text, sentiws)

#takes a while

In [None]:
#new col for sentiws

j_cited["sentiws"] = j_c_sentiment
j_not_cited["sentiws"] = j_nc_sentiment
k_cited["sentiws"] = k_c_sentiment
k_not_cited["sentiws"] = k_nc_sentiment

j_all_3a = pd.concat([j_cited, j_not_cited])
j_all_3a = j_all_3a.sort_index()

k_all_3a = pd.concat([k_cited, k_not_cited])
k_all_3a = k_all_3a.sort_index()

In [None]:
#calculate rel_sentiment by dividing through token_count

j_all_3a["rel_sentiws"] = [sentiws/token_count if token_count != 0 else 0 for sentiws, token_count in zip(j_all_3a["sentiws"], j_all_3a["token_count"])]
k_all_3a["rel_sentiws"] = [sentiws/token_count if token_count != 0 else 0 for sentiws, token_count in zip(k_all_3a["sentiws"], k_all_3a["token_count"])]

In [None]:
j_all_3b = apply_scaling(j_all_3a, "rel_sentiws", "neg_pos")
k_all_3b = apply_scaling(k_all_3a, "rel_sentiws", "neg_pos")

In [None]:
top_sent = k_all_3b.sort_values(by="rel_sentiws")
top_sent

In [None]:
#line graph for sentiws values and cited/not_cited passages

fig = px.line(k_all_3b, x=k_all_3b.sentiws.index, y="rel_sentiws", color="passage_type",color_discrete_sequence=["#C16152", "#509F98"])
fig.update_layout( template="plotly_dark", font=dict(family = "CMU Serif", size=20), width=1500)
fig.show()

#write_vis(["svg"], "k_all_sentiws_bar", fig)

## germansentiment

In [None]:
# run get_germansentiment for all subsets

j_germansentiment = get_germansentiment(j_all_3b.text)
k_germansentiment = get_germansentiment(k_all_3b.text)

In [None]:
#append germansentiment to previous dfs again

j_all_3b["germansentiment"] = j_germansentiment
k_all_3b["germansentiment"] = k_germansentiment

In [None]:
compare_sentiment(28, j_all_3b)

In [None]:
j_all_3c = map_sentiment(j_all_3b)
k_all_3c = map_sentiment(k_all_3b)

In [None]:
# save new dataframes again for the next step

j_all_3c.to_pickle("data\\3_sentiment_data\\j_all_3.pkl")
k_all_3c.to_pickle("data\\3_sentiment_data\\k_all_3.pkl")