In [28]:
import pandas as pd
import numpy as np
from nltk import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
np.random.seed(500)

In [29]:
df = pd.read_pickle('../data/v2.0-processed/df_premises.pickle')
df.head()

Unnamed: 0,document,file_path,split,premise,type,logos,pathos,ethos
0,88,v2.0/positive/88.xml,positive,[47% of all jobs are at risk of being automate...,logos,True,False,False
1,88,v2.0/positive/88.xml,positive,This number will grow grow until the vast majo...,logos_pathos,True,True,False
2,88,v2.0/positive/88.xml,positive,Since some of this automation will inevitably ...,logos_pathos,True,True,False
3,88,v2.0/positive/88.xml,positive,By fortifying themselves in their gated commun...,logos_pathos,True,True,False
4,88,v2.0/positive/88.xml,positive,"Once everyone is dead, they can simply be wipe...",logos_pathos,True,True,False


In [39]:
df.sample(10)

Unnamed: 0,document,file_path,split,premise,type,logos,pathos,ethos
1698,81,v2.0/negative/81.xml,negative,"""33% of women who obtain abortions have family...",ethos_logos,True,False,True
1451,12,v2.0/negative/12.xml,negative,It's not like it was an utopia where zoro or l...,pathos,False,True,False
1125,88,v2.0/negative/88.xml,negative,"this practice would be much easier to ""evangel...",pathos,False,True,False
1064,1,v2.0/positive/1.xml,positive,You've appealed to all my sensibilities aroun...,pathos,False,True,False
335,29,v2.0/positive/29.xml,positive,even if countries are peaceful and cooperativ...,logos,True,False,False
1005,86,v2.0/positive/86.xml,positive,"It wasn't a made for TV movie, but that's what...",logos_pathos,True,True,False
238,72,v2.0/positive/72.xml,positive,"Forgiveness is a gift, not an expectation.",logos_pathos,True,True,False
771,82,v2.0/positive/82.xml,positive,"After all, most people choose to remain anonym...",ethos_logos_pathos,True,True,True
1889,79,v2.0/negative/79.xml,negative,The perception of the value of human life goes...,logos_pathos,True,True,False
2066,85,v2.0/negative/85.xml,negative,https://www.youtube.com/watch?v=rlQrYCacrKo,logos_pathos,True,True,False


In [22]:
# Lowercase
df['premise'] = df['premise'].str.lower()
df['logos'] = df['logos'].map({True: 'Logos', False: 'Not Logos'})
df['pathos'] = df['pathos'].map({True: 'Pathos', False: 'Not Pathos'})
df['ethos'] = df['ethos'].map({True: 'Ethos', False: 'Not Ethos'})
df['premise'] = [word_tokenize(entry) for entry in df['premise']]
df['text'] = [' '.join(entry) for entry in df['premise']]

In [25]:
# df['logos'].value_counts()
# df['ethos'].value_counts()
# df['pathos'].value_counts()

Pathos        1194
Not Pathos     878
Name: pathos, dtype: int64

In [26]:
# train, test = train_test_split(df, test_size=0.2, random_state=0, stratify=df['type'], shuffle=True)

In [8]:
train, test = train_test_split(df, test_size=0.2, random_state=0, stratify=df['type'], shuffle=True)
print(test['pathos'].value_counts())
print(test['logos'].value_counts())
print(test['ethos'].value_counts())
def train_svm(sem_type):
    Train_X, Test_X, Train_Y, Test_Y = train['text'], test['text'], train[sem_type], test[sem_type]
    Encoder = LabelEncoder()
    Train_Y = Encoder.fit_transform(Train_Y)
    Test_Y = Encoder.fit_transform(Test_Y)
    Tfidf_vect = TfidfVectorizer(max_features=5000)
    Tfidf_vect.fit(df['text'])
    Train_X_Tfidf = Tfidf_vect.transform(Train_X)
    Test_X_Tfidf = Tfidf_vect.transform(Test_X)
    # Classifier - Algorithm - SVM
    # fit the training dataset on the classifier
    SVM = svm.LinearSVC(C=1.0)
    SVM.fit(Train_X_Tfidf, Train_Y) # predict the labels on validation dataset
    predictions_SVM = SVM.predict(Test_X_Tfidf) # Use accuracy_score function to get the accuracy
    print(f"{sem_type} -> \n", classification_report(Test_Y, predictions_SVM, target_names=Encoder.classes_))

Pathos        239
Not Pathos    176
Name: pathos, dtype: int64
Logos        352
Not Logos     63
Name: logos, dtype: int64
Not Ethos    386
Ethos         29
Name: ethos, dtype: int64


In [9]:
for sem_type in ['logos', 'ethos', 'pathos']:
    train_svm(sem_type=sem_type)

logos -> 
               precision    recall  f1-score   support

       Logos       0.89      0.99      0.94       352
   Not Logos       0.83      0.32      0.46        63

    accuracy                           0.89       415
   macro avg       0.86      0.65      0.70       415
weighted avg       0.88      0.89      0.86       415

ethos -> 
               precision    recall  f1-score   support

       Ethos       0.90      0.31      0.46        29
   Not Ethos       0.95      1.00      0.97       386

    accuracy                           0.95       415
   macro avg       0.93      0.65      0.72       415
weighted avg       0.95      0.95      0.94       415

pathos -> 
               precision    recall  f1-score   support

  Not Pathos       0.70      0.55      0.61       176
      Pathos       0.71      0.83      0.77       239

    accuracy                           0.71       415
   macro avg       0.71      0.69      0.69       415
weighted avg       0.71      0.71      0

In [10]:
df

Unnamed: 0,document,file_path,split,premise,type,logos,pathos,ethos,text
0,88,v2.0/positive/88.xml,positive,"[[, 47, %, of, all, jobs, are, at, risk, of, b...",logos,Logos,Not Pathos,Not Ethos,[ 47 % of all jobs are at risk of being automa...
1,88,v2.0/positive/88.xml,positive,"[this, number, will, grow, grow, until, the, v...",logos_pathos,Logos,Pathos,Not Ethos,this number will grow grow until the vast majo...
2,88,v2.0/positive/88.xml,positive,"[since, some, of, this, automation, will, inev...",logos_pathos,Logos,Pathos,Not Ethos,since some of this automation will inevitably ...
3,88,v2.0/positive/88.xml,positive,"[by, fortifying, themselves, in, their, gated,...",logos_pathos,Logos,Pathos,Not Ethos,by fortifying themselves in their gated commun...
4,88,v2.0/positive/88.xml,positive,"[once, everyone, is, dead, ,, they, can, simpl...",logos_pathos,Logos,Pathos,Not Ethos,"once everyone is dead , they can simply be wip..."
...,...,...,...,...,...,...,...,...,...
2067,85,v2.0/negative/85.xml,negative,"[the, some, few, people, that, still, know, ho...",logos_pathos,Logos,Pathos,Not Ethos,the some few people that still know how to hun...
2068,85,v2.0/negative/85.xml,negative,"[machines, do, n't, have, money, either, so, t...",logos_pathos,Logos,Pathos,Not Ethos,machines do n't have money either so they wo n...
2069,85,v2.0/negative/85.xml,negative,"[cryptocurrency, may, help, a, little, because...",logos,Logos,Not Pathos,Not Ethos,cryptocurrency may help a little because it wo...
2070,85,v2.0/negative/85.xml,negative,"[once, they, can, administrate, their, own, sy...",logos,Logos,Not Pathos,Not Ethos,once they can administrate their own systems w...


In [14]:
train_premises = df.sample(5)

In [None]:
train_premises