In [None]:
from google.colab import drive

import os
import glob

import numpy as np
import pandas as pd

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import HashingVectorizer, TfidfVectorizer, CountVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier, LogisticRegression
from sklearn import metrics
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.svm import LinearSVC, SVC
from imblearn.over_sampling import SMOTE
from xgboost import XGBClassifier

import tensorflow as tf

from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Embedding,Dense,GRU, Bidirectional, LSTM, Flatten, GlobalAveragePooling1D, Dropout, InputLayer, Input



In [None]:
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Reading data from csv file
data = pd.read_csv('./data/final_data_v2.csv')

In [None]:
data.columns

Index(['title', 'type'], dtype='object')

In [None]:
# Defining classes
classes = {
    'fake' : 0,
    'hate' : 1,
    'safe' : 2
}

# Convert lable to integer (0 or 1)
data['type'] = data['type'].apply(lambda x: classes[x])

# drop missing values rows
data = data.dropna()

# Shuffling data 
final_data = shuffle(data)

In [None]:
# Defining X and y, X is being title of the news and type being label
x = final_data['title']
y = final_data['type']

# Class counts
y.value_counts()

1    76495
0    76495
2    21417
Name: type, dtype: int64

In [None]:
x_train, x_test , y_train, y_test = train_test_split(x, y , test_size = 0.20)

In [None]:
# Convert News titles into Vectors
hashingVectorizer = HashingVectorizer(stop_words='english', alternate_sign=False)
hashingVectorizer.fit(x_train)
x_train_new = hashingVectorizer.transform(x_train)
x_test_new = hashingVectorizer.transform(x_test)

In [None]:
smote = SMOTE()
ov_train_x, ov_train_y = smote.fit_resample(x_train_new, y_train)



In [None]:
ov_test_x, ov_test_y = smote.fit_resample(x_test_new, y_test)



In [None]:
# Linear SVM
linearSVM = LinearSVC()
linearSVM.fit(ov_train_x, ov_train_y)
y_pred_lsvm = linearSVM.predict(ov_test_x)

print('Classification Report for Linear SVM: \n', metrics.classification_report(ov_test_y, y_pred_lsvm))

Classification Report for Linear SVM: 
               precision    recall  f1-score   support

           0       0.82      0.85      0.84     15335
           1       0.85      0.83      0.84     15335
           2       0.92      0.92      0.92     15335

    accuracy                           0.86     46005
   macro avg       0.87      0.86      0.86     46005
weighted avg       0.87      0.86      0.86     46005



In [None]:
# Multinomial Naive Bayes
multinomialNB = MultinomialNB(alpha=0.01)
multinomialNB.fit(ov_train_x, ov_train_y)
y_pred_nb = multinomialNB.predict(ov_test_x)

print('Classification Report for Multinomial Naive Bayes: \n', metrics.classification_report(ov_test_y, y_pred_nb))

Classification Report for Multinomial Naive Bayes: 
               precision    recall  f1-score   support

           0       0.84      0.77      0.80     15335
           1       0.80      0.84      0.82     15335
           2       0.88      0.90      0.89     15335

    accuracy                           0.84     46005
   macro avg       0.84      0.84      0.84     46005
weighted avg       0.84      0.84      0.84     46005



In [None]:
# Logistic Regression
logisticRegression = LogisticRegression()
logisticRegression.fit(ov_train_x, ov_train_y)
y_pred_lr = logisticRegression.predict(ov_test_x)

print('Classification Report for Logistic Regression: \n', metrics.classification_report(ov_test_y, y_pred_lr))

Classification Report for Logistic Regression: 
               precision    recall  f1-score   support

           0       0.81      0.86      0.83     15335
           1       0.86      0.81      0.83     15335
           2       0.92      0.91      0.91     15335

    accuracy                           0.86     46005
   macro avg       0.86      0.86      0.86     46005
weighted avg       0.86      0.86      0.86     46005



STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression


In [None]:
# Passive Aggressive Classifier
passiveAggressiveClassifier = PassiveAggressiveClassifier(max_iter=50, random_state=7)
passiveAggressiveClassifier.fit(ov_train_x, ov_train_y)
y_pred_pa = passiveAggressiveClassifier.predict(ov_test_x)

print('Classification Report for Passive Aggressive Classifier: \n', metrics.classification_report(ov_test_y, y_pred_pa))

Classification Report for Passive Aggressive Classifier: 
               precision    recall  f1-score   support

           0       0.80      0.81      0.80     15335
           1       0.81      0.83      0.82     15335
           2       0.92      0.89      0.90     15335

    accuracy                           0.84     46005
   macro avg       0.84      0.84      0.84     46005
weighted avg       0.84      0.84      0.84     46005



In [None]:
# XGBoost
xgBoostClassifier = XGBClassifier(objective='multi:softmax',missing=None)
xgBoostClassifier.fit(ov_train_x, ov_train_y)
y_pred_xgb = xgBoostClassifier.predict(ov_test_x)

print('Classification Report for XGBoost Classifier: \n', metrics.classification_report(ov_test_y, y_pred_xgb))

Classification Report for XGBoost Classifier: 
               precision    recall  f1-score   support

           0       0.58      0.91      0.71     15335
           1       0.85      0.51      0.64     15335
           2       0.89      0.74      0.81     15335

    accuracy                           0.72     46005
   macro avg       0.78      0.72      0.72     46005
weighted avg       0.78      0.72      0.72     46005



In [None]:
# SVM
svm = SVC()
svm.fit(ov_train_x, ov_train_y)
y_pred = svm.predict(ov_test_x)

print('Classification Report for SVM: \n', metrics.classification_report(ov_test_y, y_pred))

Classification Report for SVM: 
               precision    recall  f1-score   support

           0       0.82      0.89      0.85     15352
           1       0.88      0.83      0.86     15352
           2       0.95      0.92      0.94     15352

    accuracy                           0.88     46056
   macro avg       0.88      0.88      0.88     46056
weighted avg       0.88      0.88      0.88     46056

