In [1]:
# load debate data
import csv
data = (open('../OisheeHansardDebates/QEC Parliament.csv', 'r', encoding='mac_roman'))

### Lexicon approach

In [2]:
from collections import OrderedDict

debates = csv.reader(data)
data.seek(0) # reset csv reader

# get nested dict with lists of statements by party for each date
debates_over_time = {}
for row in debates:
    if row[-2] not in debates_over_time:
        debates_over_time[row[-2]] = OrderedDict()
    if row[0] not in debates_over_time[row[-2]]:
        debates_over_time[row[-2]][row[0]] = [row[-1]]
    else:
        debates_over_time[row[-2]][row[0]].append(row[-1])

In [3]:
from nltk import word_tokenize, pos_tag # pos tagging
from nltk.corpus import sentiwordnet as swn
#from nltk import download
#download('punkt')
#download('averaged_perceptron_tagger')

pos_dict = {'v': 'V', 'n': 'NN', 'r': 'RB', 'a': 'JJ'} # translate between sentiwordnet and nltk pos tag schema

print('Date      | Score  | Polarity')

# for each party, for each day/debate, print sentiment score 
for key, val in debates_over_time.items():
    print('\n',key)
    for k, v in val.items():
        score = 0.0
        polarity = ''
        for statement in v:
            for word in pos_tag(word_tokenize(statement)):
                for key, val in pos_dict.items()
                if 'NN' in word[1]:
                    try:
                        score += list(swn.senti_synsets(word[0].lower(),'n'))[0].pos_score() - \
                                    list(swn.senti_synsets(word[0].lower(),'n'))[0].neg_score()
                            
                    except:
                        pass
        if score > 0.0:
            polarity = 'Positive'
        elif score < 0.0:
            polarity = 'Negative'
        else:
            polarity = 'Neutral'
        print(k, score, polarity)

Date      | Score  | Polarity

 Lab
09/12/2002 2.125 Positive
30/01/2003 22.0 Positive
02/03/2004 -1.875 Negative
15/12/2004 2.5 Positive
18/01/2007 4.25 Positive
25/06/2007 1.25 Positive
20/02/2012 0.25 Positive
10/05/2012 12.25 Positive
11/06/2012 0.0 Neutral
25/02/2013 0.5 Positive
27/11/2017 0.75 Positive

 LD
09/12/2002 0.125 Positive
30/01/2003 4.625 Positive
02/03/2004 0.625 Positive
15/12/2004 -0.375 Negative
18/01/2007 1.125 Positive
25/06/2007 0.25 Positive
10/05/2012 1.0 Positive

 Con
30/01/2003 6.5 Positive
02/03/2004 0.875 Positive
15/12/2004 -0.125 Negative
18/01/2007 1.125 Positive
25/06/2007 0.75 Positive
20/02/2012 1.25 Positive
10/05/2012 28.5 Positive
11/06/2012 1.625 Positive
25/02/2013 0.5 Positive
27/11/2017 4.5 Positive

 NA
30/01/2003 -0.125 Negative
15/12/2004 0.0 Neutral
10/05/2012 0.25 Positive

 Ulster Unionist Party
30/01/2003 0.125 Positive

 Independent
30/01/2003 0.125 Positive

 SNP
30/01/2003 -0.75 Negative
10/05/2012 0.25 Positive

 Crossbench
02/03/

### Machine learning approach

In [None]:
debates = csv.reader(data)
data.seek(0) # reset csv reader

debates_dict = {}

for row in debates:
    date = row[0].split('/')[2] + row[0].split('/')[1] + row[0].split('/')[0]
    if date + ' ' + row[3] not in debates_dict:
        debates_dict[date + ' ' + row[3]] = ['','','', '',
                                             '', row[-1], '', '', '', '',
                                             '', '', row[5], row[3], 0]
    else:
        for i in range(6,10):
            if debates_dict[date + ' ' + row[3]][i] == '':
                debates_dict[date + ' ' + row[3]][i] = row[-1]
                break

debates_dict2 = {}
                
for k, v in debates_dict.items():
    debates_dict2[k] = k + ', ' + str(v)[1:-1]

In [None]:
# load pretrained ML model
import pickle
with open('../resources/pretrainedmodel.pkl', 'rb') as fin:
    vectorizer, SVM = pickle.load(fin)

In [None]:
for key, val in debates_over_time.items():
    print(key)
    for k, v in val.items():
        speech = ''
        for i in v:
            speech += i + ' '
        X_new = vectorizer.transform([speech])
        prediction = SVM.predict(X_new)[0]
        if prediction == 1:
            polarity = 'Positive'
        else:
            polarity = 'Negative'
        print(k, SVM.predict(X_new)[0], polarity)
    print('\n')

In [None]:
X_new = vectorizer.transform(["approve great fantastic love", "condemn disgrace awful disappointing"])

In [None]:
print(SVM.predict(X_new))