## Model Tuning - Test

In [1]:
!pip install imblearn



In [22]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
import pandas as pd
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import f1_score

### Import Prepped Features

In [83]:
prepped_df = pd.read_pickle("final_data.pkl")

In [73]:
#prepped_df = pd.read_csv("all_features.csv")

In [84]:
prepped_df.head()

Unnamed: 0,sentences,article,N_sentence,is_propaganda,automated_readability_index,coleman_liau_index,dale_chall_readability_score,difficult_words,flesch_kincaid_grade,flesch_reading_ease,...,590,591,592,593,594,595,596,597,598,599
0,US bloggers banned from entering UK\n,111111112,1,non-propaganda,5.9,9.15,9.2,2,4.5,73.85,...,-0.011961,-0.006269,-0.019708,0.020877,-0.004407,0.020958,-0.020239,0.001426,0.020654,-0.002244
1,Two prominent US bloggers have been banned fro...,111111112,3,non-propaganda,8.9,9.28,7.39,3,7.2,72.16,...,-0.017353,-0.000727,-0.016628,0.026902,-0.006105,0.028831,-0.026416,-0.005258,0.021545,-0.006479
2,Pamela Geller and Robert Spencer co-founded an...,111111112,5,propaganda,16.4,19.53,9.5,4,9.2,50.84,...,0.008488,0.004523,0.006107,0.000283,0.00985,-0.00484,0.006709,0.015659,-0.015211,-0.014725
3,They were due to speak at an English Defence L...,111111112,7,non-propaganda,10.1,9.28,7.9,4,7.2,77.57,...,-0.002984,-0.017503,0.003942,-0.010038,-0.019114,0.007487,0.001308,0.007575,-0.003602,-0.01424
4,A government spokesman said individuals whose ...,111111112,9,non-propaganda,14.6,13.4,8.44,5,11.5,50.16,...,-0.001977,-0.003367,-0.005935,0.002272,0.004195,-0.004013,-0.015887,-0.00142,0.003748,0.00392


In [85]:
X = prepped_df.iloc[:, 4:].values
y = prepped_df['is_propaganda'].map({'non-propaganda':0, 'propaganda':1})

In [86]:
y.value_counts()

0    10325
1     3938
Name: is_propaganda, dtype: int64

In [100]:
# Splitting the dataset into the Training set and Test set
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 0)

In [101]:
# Feature Scaling
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [106]:
sm = SMOTE(random_state=42)
X_train_res, y_train_res = sm.fit_resample(X_train, y_train)

In [107]:
classifier_rbf = SVC(kernel = 'rbf', random_state = 1145522)
classifier_rbf.fit(X_train_res, y_train_res)



SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto_deprecated',
  kernel='rbf', max_iter=-1, probability=False, random_state=1145522,
  shrinking=True, tol=0.001, verbose=False)

In [108]:
# Predicting the total set results
y_pred_rbf = classifier_rbf.predict(X_test)

In [111]:
X_scaled = sc.transform(X)
y_pred_rbf_tot = classifier_rbf.predict(X_scaled)

### Linear

In [109]:
# Making the Confusion Matrix
from sklearn.metrics import confusion_matrix
cm1 = confusion_matrix(y_test, y_pred_rbf)
cm1

array([[2551,  572],
       [ 597,  559]])

In [112]:
f1_rbf = f1_score(y, y_pred_rbf_tot)
print("Kernal SVM F1 result: " + str(f1_rbf))

Kernal SVM F1 result: 0.7349788082772375


In [114]:
import pickle

with open('new_model.pkl', 'wb') as f:
    pickle.dump(y_pred_rbf_tot, f)

In [121]:
export_pred = pd.concat([prepped_df[['article', 'N_sentence']], pd.Series(y_pred_rbf_tot)], axis = 1)

In [122]:
export_pred.columns = ['article', 'N_sentence', 'label']

In [124]:
export_pred.to_csv('export_pred_task2.csv')

In [None]:
###################################################

In [None]:
prepped_df_test = pd.read_pickle("final_data_test.pkl")

In [None]:
X_test = prepped_df.iloc[:, 4:].values
y_test = prepped_df_test['is_propaganda'].map({'non-propaganda':0, 'propaganda':1})

In [None]:
X_scaled_test = sc.transform(X_test)
y_pred_rbf_test = classifier_rbf.predict(X_scaled_test)

In [None]:
export_pred = pd.concat([prepped_df[['article', 'N_sentence']], pd.Series(y_pred_rbf_test)], axis = 1)

In [None]:
export_pred.columns = ['article', 'N_sentence', 'label']

In [None]:
export_pred.to_csv('export_pred_task2_test.csv')