# Sentiment Analysis
In this notebook, we apply sentiment analysis to all bills in order to determine the attitude of a bills towards it subject. We use a lexion based approach, since an ML approach is not feasible because the dataset is not annotated with sentiments. We use the SentiWordnet as the sentiment lexicon

## Preprocessing

In [59]:
import nltk
from nltk.corpus import wordnet as wn
from nltk.corpus import sentiwordnet as swn
from nltk.corpus import stopwords
import numpy as np
import csv
import sys

csv.field_size_limit(sys.maxsize)

9223372036854775807

In [12]:
bills_file_path = "112.csv"
id_column = 0
summary_column = 3

In [16]:
# Read the bills CSV file
bills = []

with open(bills_file_path, newline='') as csvfile:
    csvfile.readline() # skip first line with headers
    rows = csv.reader(csvfile, delimiter='|')
    
    for row in rows:
        bill = dict()
        bill['id'] = row[id_column]
        bill['summary'] = row[summary_column]
        bills.append(bill)

In [19]:
# Tokenize the bill sumaries
for bill in bills:
    summary = bill['summary']
    tokenized_summary = nltk.word_tokenize(summary)
    bill['tokenized_summary'] = tokenized_summary

In [30]:
# Lemmatize and filter the summaries
stop_words = set(stopwords.words('english')) 

un2wn_mapping = {"VERB" : wn.VERB, "NOUN" : wn.NOUN, "ADJ" : wn.ADJ, "ADV" : wn.ADV}
for bill in bills:
    lemmatized_summary = []
    for w, p in nltk.pos_tag(bill['tokenized_summary'], tagset="universal"):
        if p in ["PUNCT"] or w.lower() in stop_words:
            continue
        elif p in un2wn_mapping.keys():
            lemma = nltk.WordNetLemmatizer().lemmatize(w, pos = un2wn_mapping[p])
        elif p in ["PROPN"]:
            lemma = nltk.WordNetLemmatizer().lemmatize(w)
        else:
            continue
        
        lemma_pos = (lemma.lower(), un2wn_mapping[p])
        lemmatized_summary.append(lemma_pos)  # case insensitive
        
    bill['lemmatized_summary'] = lemmatized_summary

# Sentiment Analysis

In [21]:
nltk.download('sentiwordnet')

[nltk_data] Downloading package sentiwordnet to
[nltk_data]     /Users/Rajiv/nltk_data...
[nltk_data]   Package sentiwordnet is already up-to-date!


True

In [81]:
for bill in bills:
    summary = bill['lemmatized_summary']
    positive_score = 0
    negative_score = 0
    
    for word, pos in summary:
        synsets = list(swn.senti_synsets(word, pos = pos))
        word_positivity = np.mean([synset.pos_score() for synset in synsets])
        word_negativity = np.mean([synset.neg_score() for synset in synsets])
        
        positive_score += word_positivity
        negative_score += word_negativity
    
    bill['positive_score'] = positive_score / len(summary)
    bill['negative_score'] = negative_score / len(summary)
    

In [85]:
pos_scores = []
neg_scores = []
for bill in bills:
    pos_scores.append(bill['positive_score'])
    neg_scores.append(bill['negative_score'])