In [1]:
# mount drive
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
import pandas as pd

In [3]:
df = pd.read_csv("fake_and_real_news.csv")

In [4]:
df.head(5)

Unnamed: 0,Text,label
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake
1,U.S. conservative leader optimistic of common ...,Real
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real
3,Court Forces Ohio To Allow Millions Of Illega...,Fake
4,Democrats say Trump agrees to work on immigrat...,Real


In [5]:
print(df.shape)

(9900, 2)


In [6]:
#check imbalance in data set
# check the distribution of data in our dataset
df['label'].value_counts()

# df["class"].value_counts()

Fake    5000
Real    4900
Name: label, dtype: int64

In [7]:
df['label_num'] = df['label'].map(
    {
     'Fake': 0,
     'Real':1
    }
)

In [8]:
print(df.head(10))

                                                Text label  label_num
0   Top Trump Surrogate BRUTALLY Stabs Him In The...  Fake          0
1  U.S. conservative leader optimistic of common ...  Real          1
2  Trump proposes U.S. tax overhaul, stirs concer...  Real          1
3   Court Forces Ohio To Allow Millions Of Illega...  Fake          0
4  Democrats say Trump agrees to work on immigrat...  Real          1
5  France says pressure needed to stop North Kore...  Real          1
6  Trump on Twitter (August 8): Opioid crisis, No...  Real          1
7   BUSTED: Trump Supporter Used Poll Watcher Cre...  Fake          0
8  Fatal Niger operation sparks calls for public ...  Real          1
9  Trump says he has 'great heart' for immigrant ...  Real          1


In [9]:
import spacy
!python -m spacy download en_core_web_lg
nlp = spacy.load("en_core_web_lg")

2023-12-26 15:21:39.892956: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-26 15:21:39.893018: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-26 15:21:39.894402: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
Collecting en-core-web-lg==3.6.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_lg-3.6.0/en_core_web_lg-3.6.0-py3-none-any.whl (587.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m587.7/587.7 MB[0m [31m1.4 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: en-core-web-lg
Successfully install

In [10]:
# Convert text to vector

df['vector'] = df['Text'].apply(lambda text: nlp(text).vector)

In [11]:
len(df)

9900

In [12]:
print(df.head())

                                                Text label  label_num  \
0   Top Trump Surrogate BRUTALLY Stabs Him In The...  Fake          0   
1  U.S. conservative leader optimistic of common ...  Real          1   
2  Trump proposes U.S. tax overhaul, stirs concer...  Real          1   
3   Court Forces Ohio To Allow Millions Of Illega...  Fake          0   
4  Democrats say Trump agrees to work on immigrat...  Real          1   

                                              vector  
0  [-0.6759837, 1.4263071, -2.318466, -0.451093, ...  
1  [-1.8355803, 1.3101058, -2.4919677, 1.0268308,...  
2  [-1.9851209, 0.14389805, -2.4221718, 0.9133005...  
3  [-2.7812982, -0.16120885, -1.609772, 1.3624227...  
4  [-2.2010763, 0.9961637, -2.4088492, 1.128273, ...  


In [13]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split (
    df.vector.values,
    df.label_num,
    test_size = 0.2,
    random_state = 42
)

In [14]:
X_train.shape

(7920,)

In [15]:
X_test.shape

(1980,)

In [16]:
import numpy as np

X_train_stack = np.stack(X_train)
X_test_stack = np.stack(X_test)


In [17]:
print(X_train_stack.shape)
print(X_test_stack.shape)

(7920, 300)
(1980, 300)


In [18]:
from sklearn.naive_bayes import MultinomialNB
model = MultinomialNB()
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler()
X_train_stack_sc = scaler.fit_transform(X_train_stack)
X_test_stack_sc = scaler.transform(X_test_stack)


model.fit(X_train_stack_sc, y_train)

In [19]:
y_pred = model.predict(X_test_stack_sc)
y_pred

array([0, 1, 1, ..., 0, 1, 1])

In [20]:
y_test

8432    0
5680    1
4767    1
9218    1
621     0
       ..
9500    1
5858    1
7442    0
2846    1
1468    1
Name: label_num, Length: 1980, dtype: int64

In [21]:
from sklearn.metrics import accuracy_score
accuracy_score(y_test, y_pred)

0.9474747474747475

In [22]:
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.95      0.94      0.95       973
           1       0.94      0.96      0.95      1007

    accuracy                           0.95      1980
   macro avg       0.95      0.95      0.95      1980
weighted avg       0.95      0.95      0.95      1980



In [23]:
from sklearn.metrics import confusion_matrix
confusion_matrix(y_test, y_pred)



array([[913,  60],
       [ 44, 963]])