In [32]:
%pip install nltk rouge-score evaluate gensim word-mover-distance transformers POT

Defaulting to user installation because normal site-packages is not writeable
Collecting word-mover-distance
  Downloading word_mover_distance-0.0.3.tar.gz (3.9 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Collecting pyemd (from word-mover-distance)
  Downloading pyemd-1.0.0-cp311-cp311-win_amd64.whl (147 kB)
     ---------------------------------------- 0.0/148.0 kB ? eta -:--:--
     -- ------------------------------------- 10.2/148.0 kB ? eta -:--:--
     ------------------------------- ------ 122.9/148.0 kB 1.4 MB/s eta 0:00:01
     -------------------------------------- 148.0/148.0 kB 1.8 MB/s eta 0:00:00
Building wheels for collected packages: word-mover-distance
  Building wheel for word-mover-distance (setup.py): started
  Building wheel for word-mover-distance (setup.py): finished with status 'done'
  Created wheel for word-mover-distance: filename=word_mover_distance-0.0.3-py3-none-any.whl size=4351 sha256=6b63bfd73

In [1]:
import nltk
from nltk.translate.bleu_score import sentence_bleu
from rouge_score import rouge_scorer
from evaluate import load
import evaluate
import numpy 
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
import torch.nn.functional as F
from nltk.translate.meteor_score import meteor_score
import ot
import gensim.downloader as api
from nltk.corpus import stopwords
from time import time
import os
import numpy as np
import pandas as pd
import json
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import confusion_matrix, accuracy_score
from metrics import distances,eval

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\aryam\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\aryam\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\aryam\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [2]:
class Scorers:
    
    # BLEU
    def compute_bleu(self,reference, candidate):
        reference_tokens = nltk.word_tokenize(reference.lower())
        candidate_tokens = nltk.word_tokenize(candidate.lower())

        bleu_1 = sentence_bleu([reference_tokens], candidate_tokens, weights=(1, 0, 0, 0))
        bleu_2 = sentence_bleu([reference_tokens], candidate_tokens, weights=(0.5, 0.5, 0, 0))
        bleu_3 = sentence_bleu([reference_tokens], candidate_tokens, weights=(0.33, 0.33, 0.33, 0))
        bleu_4 = sentence_bleu([reference_tokens], candidate_tokens, weights=(0.25, 0.25, 0.25, 0.25))

        return bleu_1, bleu_2, bleu_3, bleu_4
    
    # ROGUE
    def compute_rouge(self,reference, candidate):
    # Convert reference and candidate sentences to lists of tokens
        reference_tokens = reference.lower().split()
        candidate_tokens = candidate.lower().split()

        scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)

        scores = scorer.score(reference, candidate)

        # print(f"ROUGE-1 Precision: {scores['rouge1'].precision:.4f}")
        # print(f"ROUGE-1 Recall: {scores['rouge1'].recall:.4f}")
        # print(f"ROUGE-1 F1-score: {scores['rouge1'].fmeasure:.4f}")

        # print(f"ROUGE-2 Precision: {scores['rouge2'].precision:.4f}")
        # print(f"ROUGE-2 Recall: {scores['rouge2'].recall:.4f}")
        # print(f"ROUGE-2 F1-score: {scores['rouge2'].fmeasure:.4f}")

        # print(f"ROUGE-L Precision: {scores['rougeL'].precision:.4f}")
        # print(f"ROUGE-L Recall: {scores['rougeL'].recall:.4f}")
        # print(f"ROUGE-L F1-score: {scores['rougeL'].fmeasure:.4f}")

        return scores

    # METEOR
    def compute_meteor(self,reference, candidate):

        reference =  nltk.word_tokenize(reference.lower())
        candidate =  nltk.word_tokenize(candidate.lower())

        score = meteor_score([reference], candidate)

        return score

    # WORD MOVERS DISTANCE    
    def compute_wmd(self,sentence_a,sentence_b,model):

        sentence_a = sentence_a.lower().split()
        sentence_b = sentence_b.lower().split()


        stop_words = stopwords.words('english')
        sentence_a = [w for w in sentence_a if w not in stop_words]
        sentence_b = [w for w in sentence_b if w not in stop_words]

        # model = api.load('word2vec-google-news-300')
        distance = model.wmdistance(sentence_a,sentence_b)
        # print(distance)

        return distance
    
    # TRANSLATION ERROR RATE
    def compute_ter(self,reference, candidate):

        ref_tokens = nltk.word_tokenize(reference.lower())
        cand_tokens = nltk.word_tokenize(candidate.lower())

        substitutions = nltk.edit_distance(ref_tokens, cand_tokens)
        deletions = len(ref_tokens) - len(set(ref_tokens) & set(cand_tokens))
        insertions = len(cand_tokens) - len(set(ref_tokens) & set(cand_tokens))

        reference_length = len(ref_tokens)
        ter = (substitutions + deletions + insertions) / reference_length

        return ter

    def compute_perplexity(self,text):
        model = AutoModelForCausalLM.from_pretrained("gpt2")
        tokenizer = AutoTokenizer.from_pretrained("gpt2")
        inputs = tokenizer(text, return_tensors = "pt")
        loss = torch.nn.CrossEntropyLoss() 
        final_loss = model(input_ids = inputs["input_ids"], labels = inputs["input_ids"]).loss
        ppl = torch.exp(final_loss)
        return ppl
        # print(ppl)
        

In [3]:
def save_json(data, filepath=r'new_data.json'):
    with open(filepath, 'w') as fp:
        json.dump(data, fp, indent=4)

In [4]:
reference_text = "The president of the United States of America"
candidate_text = "The president of India"
scorer = Scorers()
bleu_1, bleu_2, bleu_3, bleu_4 = scorer.compute_bleu(reference_text, candidate_text)
print(f"BLEU-1 Score: {bleu_1:.4f}")
print(f"BLEU-2 Score: {bleu_2:.4f}")
print(f"BLEU-3 Score: {bleu_3:.4f}")
print(f"BLEU-4 Score: {bleu_4:.4f}")

BLEU-1 Score: 0.2759
BLEU-2 Score: 0.2601
BLEU-3 Score: 0.2328
BLEU-4 Score: 0.0000


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


In [5]:
model = api.load('word2vec-google-news-300')
scorer.compute_wmd(reference_text,candidate_text,model)
print("$$$$$$$$$$$$$$$$$$$$$$$")
scorer.compute_wmd(reference_text,candidate_text,model)

$$$$$$$$$$$$$$$$$$$$$$$


0.8607883257981531

In [8]:
meteor = scorer.compute_meteor(reference_text, candidate_text)
print(f"METEOR Score: {meteor:.4f}")

METEOR Score: 0.1974


In [4]:
data = json.load(open(r'data/pairwise_evaluation_w_embeddings.json'))

In [85]:
full_embeddings = np.array([datum['full_embedding'] for datum in data])
writer_summary_embeddings = np.array([datum['writer_summary_embedding'] for datum in data])
llm_summary_embeddings = np.array([datum['llm_summary_embedding'] for datum in data])

distances_writer = distances.linear_regression_distances(full_embeddings,writer_summary_embeddings)
distances_writer

AttributeError: module 'metrics.distances' has no attribute 'linear_regression_distances'

Compute all Scores for all data points

In [13]:
scorer = Scorers()
model = api.load('word2vec-google-news-300')

full_embeddings = np.array([datum['full_embedding'] for datum in data])
writer_summary_embeddings = np.array([datum['writer_summary_embedding'] for datum in data])
llm_summary_embeddings = np.array([datum['llm_summary_embedding'] for datum in data])
writer_distances = distances.linear_regression_distance(full_embeddings, writer_summary_embeddings)
llm_distances = distances.linear_regression_distance(full_embeddings, llm_summary_embeddings)

for i,datum in enumerate(data):

    print('{}/{}'.format(i, len(data)))
    writer_summ = datum['writer_summary']
    llm_summ = datum['text-davinci-002_summary']
    article = datum['article_text']

    bleu_1,bleu_2,bleu_3,bleu_4 = scorer.compute_bleu(article,writer_summ)
    datum['bleu_1_writer'] = bleu_1
    datum['bleu_2_writer'] = bleu_2
    datum['bleu_3_writer'] = bleu_3
    datum['bleu_4_writer'] = bleu_4

    bleu_1,bleu_2,bleu_3,bleu_4 = scorer.compute_bleu(article,llm_summ)
    datum['bleu_1_llm'] = bleu_1
    datum['bleu_2_llm'] = bleu_2
    datum['bleu_3_llm'] = bleu_3
    datum['bleu_4_llm'] = bleu_4

    rogue = scorer.compute_rouge(article,writer_summ)
    datum['rogue_1_writer'] = rogue['rouge1'].fmeasure
    datum['rogue_2_writer'] = rogue['rouge2'].fmeasure
    datum['rogue_L_writer'] = rogue['rougeL'].fmeasure

    rogue = scorer.compute_rouge(article,llm_summ)
    datum['rogue_1_llm'] = rogue['rouge1'].fmeasure
    datum['rogue_2_llm'] = rogue['rouge2'].fmeasure
    datum['rogue_L_llm'] = rogue['rougeL'].fmeasure

    meteor = scorer.compute_meteor(article,writer_summ)
    datum['meteor_writer'] = meteor

    meteor = scorer.compute_meteor(article,llm_summ)
    datum['meteor_llm'] = meteor

    wmd = scorer.compute_wmd(article,writer_summ,model)
    datum['WMD_writer'] = wmd

    wmd = scorer.compute_wmd(article,llm_summ,model)
    datum['WMD_llm'] = wmd

    ter = scorer.compute_ter(article,writer_summ)
    datum['TER_writer'] = ter

    ter = scorer.compute_ter(article,llm_summ)
    datum['TER_llm'] = ter

    cosine_dist = distances.cosine_distance(datum['full_embedding'],datum['writer_summary_embedding'])
    datum['cosine_writer'] = cosine_dist

    cosine_dist = distances.cosine_distance(datum['full_embedding'],datum['llm_summary_embedding'])
    datum['cosine_llm'] = cosine_dist

    datum['lr_dist_writer'] = writer_distances[i]
    datum['lr_dist_llm'] = llm_distances[i]

    perplexity_writer = scorer.compute_perplexity(writer_summ)
    perplexity_llm = scorer.compute_perplexity(llm_summ)
    perp_writer = perplexity_writer.item()
    perp_llm = perplexity_llm.item()
    datum['Writer_Perplexity'] = perp_writer
    datum['LLM_Perplexity'] = perp_llm



0/599


The hypothesis contains 0 counts of 4-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


1/599
2/599
3/599
4/599
5/599
6/599


The hypothesis contains 0 counts of 3-gram overlaps.
Therefore the BLEU score evaluates to 0, independently of
how many N-gram overlaps of lower order it contains.
Consider using lower n-gram order or use SmoothingFunction()


7/599
8/599
9/599
10/599
11/599
12/599
13/599
14/599
15/599
16/599
17/599
18/599
19/599
20/599
21/599
22/599
23/599
24/599
25/599
26/599
27/599
28/599
29/599
30/599
31/599
32/599
33/599
34/599
35/599
36/599
37/599
38/599
39/599
40/599
41/599
42/599
43/599
44/599
45/599
46/599
47/599
48/599
49/599
50/599
51/599
52/599
53/599
54/599
55/599
56/599
57/599
58/599
59/599
60/599
61/599
62/599
63/599
64/599
65/599
66/599
67/599
68/599
69/599
70/599
71/599
72/599
73/599
74/599
75/599
76/599
77/599
78/599
79/599
80/599
81/599
82/599
83/599
84/599
85/599
86/599
87/599
88/599
89/599
90/599
91/599
92/599
93/599
94/599
95/599
96/599
97/599
98/599
99/599
100/599
101/599
102/599
103/599
104/599
105/599
106/599
107/599
108/599
109/599
110/599
111/599
112/599
113/599
114/599
115/599
116/599
117/599
118/599
119/599
120/599
121/599
122/599
123/599
124/599
125/599
126/599
127/599
128/599
129/599
130/599
131/599
132/599
133/599
134/599
135/599
136/599
137/599
138/599
139/599
140/599
141/599
142/599
143/599


In [15]:
save_json(data, r'data/final_data.json')


In [5]:
final_data = json.load(open(r'data/final_data.json'))

In [6]:
df = pd.DataFrame(final_data)

Initial dataframe

In [53]:
df

Unnamed: 0,article_id,writer_id,evaluator_id,article_text,writer_summary,text-davinci-002_summary,overall_writer_better,informative_writer_better,full_embedding,writer_summary_embedding,...,meteor_writer,meteor_llm,WMD_writer,WMD_llm,TER_writer,TER_llm,cosine_writer,cosine_llm,Writer_Perplexity,LLM_Perplexity
0,18cba9a8f2f64055a707452638182303,133d66ad12ab449e8c607d188b65e948,9d49ddd0-7c67-4394-8d6b-e685a982e956,Baltimore's mayor has sacked the US city's pol...,The mayor of Baltimore fired the police chief ...,The mayor of Baltimore has sacked the city's p...,False,Equally Good,"[-0.007163366, 0.0075295228, -0.022528652, -0....","[-0.0027573644, -0.0059608207, -0.017378185, -...",...,0.073195,0.083950,0.998596,0.895712,1.937349,1.816867,0.904066,0.968363,22.838551,17.785681
1,66f39853ad2b437c8bdca86ae74bb35f,85b4d7406d144eacaede6397fafe06b9,0ec347ce-79c1-4495-8f84-43f2f57deb82,Western Sahara has welcomed Morocco's readmiss...,Morocco joined the African Union after a refer...,The article discusses Western Sahara's reactio...,False,False,"[0.0064340984, -0.013940546, 0.022131747, -0.0...","[-0.0016521142, -0.024277786, 0.004347165, -0....",...,0.032232,0.087081,1.093312,0.970475,1.944672,1.905738,0.915527,0.949937,77.512268,22.789694
2,302c800172da420f9e2e80474a9cf5ec,85b4d7406d144eacaede6397fafe06b9,0ec347ce-79c1-4495-8f84-43f2f57deb82,With the new Avengers: Age of Ultron movie rel...,James Haskell is a rugby player for the London...,"James Haskell, a rugby player for London Wasps...",Equally Good,Equally Good,"[-0.0070248763, -0.00925884, 0.0027083454, -0....","[-0.024424886, -0.0077787954, -0.0036272286, -...",...,0.092289,0.097873,1.015305,0.916830,1.888252,1.853868,0.946460,0.945514,25.392700,19.203604
3,14f71296e6404651bfdcfd300ddebcf8,7c02dffbfb0348f68758c00334878ef7,d3727ca5-7197-4a03-81a0-2137ebcd52f4,UK manufacturing activity contracted in April ...,Concerns over UK manufacturing activity have b...,The Markit/CIPS manufacturing Purchasing Manag...,False,False,"[-0.023462681, -0.024742227, -0.018057255, -0....","[-0.011470091, -0.036104277, -0.009304092, -0....",...,0.066601,0.100952,0.913022,0.855796,1.903030,1.846465,0.931565,0.969474,38.945465,27.502934
4,5a5d2bbfb7a74067abfb31a5f4888c71,564736de98b54961a003a097c04d7b50,d3727ca5-7197-4a03-81a0-2137ebcd52f4,An obese mother who enjoyed takeaways and booz...,"Lizzi, an obese mother of six, wouldn't have r...","Lizzi Crawford, 32, from Stoke-on-Trent, lost ...",False,False,"[0.0010762861, 0.00084113114, 0.0077814907, -0...","[0.00010524096, 0.007291257, 0.010436634, -0.0...",...,0.041791,0.041858,1.084467,1.077862,1.943495,1.901445,0.931857,0.952198,36.748543,43.636581
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
594,3b82559a2edb48c7bffca655c43fe34d,b33c38a1cc7a45358cbcd30311e78ae2,d3727ca5-7197-4a03-81a0-2137ebcd52f4,It's every interviewer's worst nightmare. Ask ...,When a News 4 presenter attempted to ask Rober...,The article discusses celebrities who have wal...,False,False,"[-0.0048403596, -0.01597452, 0.029282175, -0.0...","[-0.012659721, -0.015550625, 0.025793782, -0.0...",...,0.019753,0.011638,1.050061,1.100482,1.975750,1.978302,0.907267,0.893525,28.995054,22.161198
595,24df2a97c6a94a40b5f291ad5e5314b0,f7427d27b63541b8b3b1099c5f32f7de,9d49ddd0-7c67-4394-8d6b-e685a982e956,The Liberal Democrats have admitted they are o...,The Liberal Democrats are optimistic about win...,The Liberal Democrats are on course to lose at...,Equally Good,Equally Good,"[-0.00969115, 0.0142671615, 0.017133743, -0.01...","[-0.02096688, -0.00027541863, 0.0039957105, -0...",...,0.020894,0.023619,1.145921,1.064026,1.978064,1.956884,0.891258,0.923691,62.143101,18.201765
596,2c80f9196b654048b01397ebd52d3518,564736de98b54961a003a097c04d7b50,d3727ca5-7197-4a03-81a0-2137ebcd52f4,A Hertfordshire council is buying in water fro...,The Coronation Fountain in Welwyn Garden City ...,"The Coronation Fountain in Welwyn Garden City,...",True,True,"[0.016982611, -0.0031427317, -0.015396845, -0....","[0.023978898, -0.008586212, -0.002757913, 0.00...",...,0.122208,0.104249,0.849500,0.966516,1.849850,1.816817,0.938393,0.953871,72.623482,59.409863
597,f1d84317501a4ba285f0e81384471f6e,b33c38a1cc7a45358cbcd30311e78ae2,b6d4bf14-3323-43ad-a311-e33bb3d5fd49,Sport funding in Scotland is facing a 20% redu...,The country of Scotland is potentially facing ...,The article discusses how sport funding in Sco...,True,True,"[-0.009644198, -0.0059027453, 0.0016152562, -0...","[-0.005729174, -0.019543953, 0.0024205518, -0....",...,0.062730,0.039922,1.011900,1.070063,1.918367,1.929705,0.936066,0.932431,40.064541,28.392483


In [7]:
final_data = df.drop(columns=['writer_id', 'evaluator_id','article_text','writer_summary','text-davinci-002_summary','article_id'])

In [8]:
final_data['overall_writer_better'] = [str(value) for value in final_data['overall_writer_better']]

In [9]:
final_data['informative_writer_better'] = [str(value) for value in final_data['informative_writer_better']]


In [10]:
encoder = LabelEncoder()
final_data['overall_writer_better'] = encoder.fit_transform(final_data['overall_writer_better'])
final_data['informative_writer_better'] = encoder.fit_transform(final_data['informative_writer_better'])

In [11]:
X = final_data.drop(columns=['overall_writer_better','full_embedding','writer_summary_embedding','llm_summary_embedding','informative_writer_better'])
labels = final_data['overall_writer_better']

In [12]:
x, x_test, y, y_test = train_test_split(X,labels,test_size=0.2,train_size=0.8)
x_train, x_cv, y_train, y_cv = train_test_split(x,y,test_size = 0.25,train_size =0.75)

Final training data

In [83]:
x_train.columns

Index(['lr_dist_writer', 'lr_dist_llm', 'bleu_1_writer', 'bleu_2_writer',
       'bleu_3_writer', 'bleu_4_writer', 'bleu_1_llm', 'bleu_2_llm',
       'bleu_3_llm', 'bleu_4_llm', 'rogue_1_writer', 'rogue_2_writer',
       'rogue_L_writer', 'rogue_1_llm', 'rogue_2_llm', 'rogue_L_llm',
       'meteor_writer', 'meteor_llm', 'WMD_writer', 'WMD_llm', 'TER_writer',
       'TER_llm', 'cosine_writer', 'cosine_llm', 'Writer_Perplexity',
       'LLM_Perplexity'],
      dtype='object')

Final Labels

In [14]:
labels

0      1
1      1
2      0
3      1
4      1
      ..
594    1
595    0
596    2
597    2
598    2
Name: overall_writer_better, Length: 599, dtype: int32

In [15]:
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors = 5,metric = 'minkowski', p = 2)
knn.fit(x_train, y_train)

y_pred_knn = knn.predict(x_test)
cm_knn = confusion_matrix(y_test, y_pred_knn)
print (cm_knn)
acc_knn = accuracy_score(y_test, y_pred_knn)
print (acc_knn)

AttributeError: 'Flags' object has no attribute 'c_contiguous'

Random Forest Classifier

SVC

In [26]:
from sklearn.svm import SVC

svc_rbf = SVC(kernel = 'linear', random_state = 0)
svc_rbf.fit(x_train, y_train)
y_pred_svc_rbf = svc_rbf.predict(x_test)

cm_svc = confusion_matrix(y_test, y_pred_svc_rbf)
print (cm_svc)
acc_svc_rbf = accuracy_score(y_test, y_pred_svc_rbf)
print (acc_svc_rbf)


[[ 0  4 15]
 [ 0 15 42]
 [ 0  9 35]]
0.4166666666666667


Naive-Bayes

In [28]:
from sklearn.naive_bayes import GaussianNB

nb = GaussianNB()
nb.fit(x_train, y_train)
y_pred = nb.predict(x_test)

cm_nb = confusion_matrix(y_test, y_pred)
print(cm_nb)
acc_nb = accuracy_score(y_test, y_pred)
print(acc_nb)

[[ 3  0 16]
 [ 2  9 46]
 [ 1  3 40]]
0.43333333333333335


Decision Tree Classifier

In [29]:
from sklearn.tree import DecisionTreeClassifier

dt = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
dt.fit(x_train, y_train)
y_pred = dt.predict(x_test)

cm_dt = confusion_matrix(y_test, y_pred)
print (cm_dt)
acc_dt = accuracy_score(y_test, y_pred)
print (acc_dt)


[[ 5  8  6]
 [20 20 17]
 [16 15 13]]
0.31666666666666665


Random Forest Classifier

In [52]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(n_estimators=100, max_depth=3, random_state=42,class_weight=None)
rf.fit(x_train, y_train)

y_pred = rf.predict(x_test)
accuracy = rf.score(x_test, y_test)

cm_rf = confusion_matrix(y_test, y_pred)
print (cm_rf)

print('Accuracy:', accuracy)

[[ 2  3 14]
 [ 1 18 38]
 [ 1  7 36]]
Accuracy: 0.4666666666666667


AdaBoost Classifier

In [54]:
from sklearn.ensemble import AdaBoostClassifier

abc = AdaBoostClassifier()
abc.fit(x_train, y_train)

y_pred_abc = abc.predict(x_test)
cm_ada = confusion_matrix(y_test, y_pred_abc)
print (cm_ada)
acc_abc = accuracy_score(y_test, y_pred_abc)
print (acc_abc)

[[ 4  4 11]
 [ 9 22 26]
 [10 13 21]]
0.39166666666666666


Quadratic discriminant analysis

In [79]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

qda = QuadraticDiscriminantAnalysis()
qda.fit(x_train, y_train)

y_pred_qda = qda.predict(x_test)
cm_qda = confusion_matrix(y_test, y_pred_qda)
print (cm_qda)
acc_qda = accuracy_score(y_test, y_pred_qda)
print (acc_qda)

[[ 4  1 14]
 [ 2 11 44]
 [ 3  3 38]]
0.44166666666666665




MLP Classifier

In [76]:
from sklearn.neural_network import MLPClassifier

mlp = MLPClassifier(alpha=1, max_iter=1000)
mlp.fit(x_train, y_train)

y_pred_mlp = mlp.predict(x_test)
cm = confusion_matrix(y_test, y_pred_mlp)
print (cm)
acc_mlp = accuracy_score(y_test, y_pred_mlp)
print (acc_mlp)


[[ 1 12  6]
 [ 1 49  7]
 [ 1 33 10]]
0.5


In [82]:
from tabulate import tabulate
 
mydata = [
    ["Random forest",acc_rf,cm_rf], 
    ["Linear SVM", acc_svc_rbf,cm_svc], 
    ["Adaboost", acc_abc,cm_ada], 
    ["Quadratic Discriminant Analysis", acc_qda,cm_qda],
    ["Decision tree",acc_dt,cm_dt],
    ["GaussianNB",acc_nb,cm_nb],
    ["MLP",acc_mlp,cm]
]
 
head = ["Classifier", "Accuracy", "Confusion Matrix"]
 
# display table
print(tabulate(mydata, headers=head, tablefmt="grid"))


+---------------------------------+------------+--------------------+
| Classifier                      |   Accuracy | Confusion Matrix   |
| Random forest                   |   0.441667 | [[ 2  3 14]        |
|                                 |            |  [ 1 18 38]        |
|                                 |            |  [ 1  7 36]]       |
+---------------------------------+------------+--------------------+
| Linear SVM                      |   0.416667 | [[ 0  4 15]        |
|                                 |            |  [ 0 15 42]        |
|                                 |            |  [ 0  9 35]]       |
+---------------------------------+------------+--------------------+
| Adaboost                        |   0.391667 | [[ 4  4 11]        |
|                                 |            |  [ 9 22 26]        |
|                                 |            |  [10 13 21]]       |
+---------------------------------+------------+--------------------+
| Quadratic Discrimi