## Importing Libraries

In [2]:
import numpy as np              # Numerical operations and arrays
import pandas as pd             # Data loading and manipulation
import matplotlib.pyplot as plt # Basic data visualization
import seaborn as sns           # Advanced statistical visualizations

from sklearn.model_selection import train_test_split  # Split data into train and test sets
from sklearn.metrics import accuracy_score             # Measure classification accuracy
from sklearn.metrics import classification_report      # Precision, recall, F1-score summary

import re                       # Text pattern matching and cleaning
import string                   # Predefined string constants (punctuation, letters)


In [3]:
df_fake = pd.read_csv("Fake.csv")
df_true = pd.read_csv("True.csv")

In [4]:
df_fake.head()

Unnamed: 0,title,text,subject,date
0,Donald Trump Sends Out Embarrassing New Year’...,Donald Trump just couldn t wish all Americans ...,News,"December 31, 2017"
1,Drunk Bragging Trump Staffer Started Russian ...,House Intelligence Committee Chairman Devin Nu...,News,"December 31, 2017"
2,Sheriff David Clarke Becomes An Internet Joke...,"On Friday, it was revealed that former Milwauk...",News,"December 30, 2017"
3,Trump Is So Obsessed He Even Has Obama’s Name...,"On Christmas day, Donald Trump announced that ...",News,"December 29, 2017"
4,Pope Francis Just Called Out Donald Trump Dur...,Pope Francis used his annual Christmas Day mes...,News,"December 25, 2017"


In [5]:
df_true.head()

Unnamed: 0,title,text,subject,date
0,"As U.S. budget fight looms, Republicans flip t...",WASHINGTON (Reuters) - The head of a conservat...,politicsNews,"December 31, 2017"
1,U.S. military to accept transgender recruits o...,WASHINGTON (Reuters) - Transgender people will...,politicsNews,"December 29, 2017"
2,Senior U.S. Republican senator: 'Let Mr. Muell...,WASHINGTON (Reuters) - The special counsel inv...,politicsNews,"December 31, 2017"
3,FBI Russia probe helped by Australian diplomat...,WASHINGTON (Reuters) - Trump campaign adviser ...,politicsNews,"December 30, 2017"
4,Trump wants Postal Service to charge 'much mor...,SEATTLE/WASHINGTON (Reuters) - President Donal...,politicsNews,"December 29, 2017"


##  Assigning Classes to the Dataset

In [7]:
df_fake["class"] = 0
df_true["class"] = 1

## Checking Number of Rows and Columns in the Dataset

In [9]:
df_fake.shape , df_true.shape

((23481, 5), (21417, 5))

## Manual Testing for Both the Dataset

In [11]:
data_fake_manual_testing = df_fake.tail(10)
for i in range(23480,23470, -1):
    df_fake.drop([i], axis = 0, inplace = True)

data_true_manual_testing = df_true.tail(10)
for i in range(21416,21406,-1):
    df_true.drop([i], axis = 0, inplace = True)

## Assigning Classes to the Dataset

In [13]:
data_fake_manual_testing["class"] = 0
data_true_manual_testing["class"] = 1

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_fake_manual_testing["class"] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data_true_manual_testing["class"] = 1


## Merging Both the Dataset

In [15]:
data_merge = pd.concat([df_fake, df_true] , axis=0)
data_merge.sample(10)

Unnamed: 0,title,text,subject,date,class
2765,For women at the U.S. Congress: the right to b...,WASHINGTON (Reuters) - After an internet-fuele...,politicsNews,"July 13, 2017",1
3501,White House declines to say if Trump has made ...,WASHINGTON (Reuters) - White House spokesman S...,politicsNews,"May 31, 2017",1
8847,Gabby Giffords Has Made Her Choice For Who Sh...,The adversity that former Congresswoman Gabby ...,News,"January 10, 2016",0
17610,ANOTHER CLINTON CASUALTY? Sister Of Woman Who ...,As more and more women line up to tell their s...,left-news,"Nov 25, 2017",0
25,Despicable Trump Suggests Female Senator Woul...,"Donald Trump is afraid of strong, powerful wom...",News,"December 12, 2017",0
21341,KARMA: Manufactured Race War Backfires…Missou ...,"kar ma k rm /Submit noun destiny or fate, fol...",left-news,"Nov 13, 2015",0
948,Mark Hamill Has The BEST Response To Trump’s ...,Donald Trump is going to lose his shit if he r...,News,"July 2, 2017",0
7454,"Scottish leader says Trump win brings ""real se...",EDINBURGH (Reuters) - Scotland’s First Ministe...,politicsNews,"November 9, 2016",1
11631,Hariri says Gulf states not planning measures ...,BEIRUT (Reuters) - Lebanese Prime Minister Saa...,worldnews,"December 21, 2017",1
20737,Spanish PM Rajoy to ask court to revoke Catala...,MADRID (Reuters) - Spanish Prime Minister Mari...,worldnews,"September 7, 2017",1


## Dropping Unwanted Columns

In [17]:
data = data_merge.drop(["title", "subject", "date"] , axis = 1)

## Create a Function to Clean Text


###### re.sub(pattern, replacement, text)


In [20]:
### re.sub(pattern, replacement, text)

def wordopt(text):
    text = text.lower()
    text = re.sub('\[ .*? \]', '', text)
    text = re.sub("\\W", " ", text)
    text = re.sub('https ?: //\S+|www\.\S+', '', text)
    text = re.sub(' <.*? >+', '', text)
    text = re.sub(' [%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

  text = re.sub('\[ .*? \]', '', text)
  text = re.sub('https ?: //\S+|www\.\S+', '', text)
  text = re.sub('\w*\d\w*', '', text)


## Applying Function to Text Column and Assigning X and Y

In [22]:
data['text'] = data['text'].apply(wordopt)

x = data['text']
y = data['class' ]

## Defining Training and Testing Data

In [24]:
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size= 0.25)

## Converting Raw Data Into Matrix for Further Process.

In [26]:
from sklearn.feature_extraction.text import TfidfVectorizer  # Convert text into TF-IDF numerical features

vectorization = TfidfVectorizer()      # Initialize TF-IDF vectorizer
xv_train = vectorization.fit_transform(x_train)  # Learn vocabulary + IDF, then transform training text
xv_test = vectorization.transform(x_test)        # Transform test text using SAME vocabulary (no refit)
 

## Creating First Model.

In [28]:
from sklearn.linear_model import LogisticRegression

LR = LogisticRegression()
LR.fit(xv_train, y_train)

In [29]:
## Checking the Model Accuracy and Classification Report
pred_lr = LR.predict(xv_test)

In [30]:
LR.score(xv_test, y_test)

0.9869875222816399

In [31]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5976
           1       0.98      0.99      0.99      5244

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



## Creating a Second Model

In [33]:
from sklearn.tree import DecisionTreeClassifier

DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

In [34]:
## Checking the Model Accuracy and Classification Report
pred_dt = DT.predict(xv_test)

In [35]:
DT.score(xv_test, y_test)

0.9961675579322639

In [36]:
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00      5976
           1       1.00      0.99      1.00      5244

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



## Creating Third Model 

In [38]:
from sklearn.ensemble import GradientBoostingClassifier

In [39]:
GB = GradientBoostingClassifier(random_state = 0)
GB.fit(xv_train, y_train)

In [40]:
## Checking the Model Accuracy and Classification Report
pred_gb = GB.predict(xv_test)

In [41]:
GB.score(xv_test, y_test)

0.996078431372549

In [42]:
print(classification_report(y_test, pred_gb))

              precision    recall  f1-score   support

           0       1.00      0.99      1.00      5976
           1       0.99      1.00      1.00      5244

    accuracy                           1.00     11220
   macro avg       1.00      1.00      1.00     11220
weighted avg       1.00      1.00      1.00     11220



## Creating Fourth Model

In [44]:
from sklearn.ensemble import RandomForestClassifier

In [45]:
RF = RandomForestClassifier(random_state = 0)
RF.fit(xv_train, y_train)

In [47]:
## Checking the Model Accuracy and Classification Report
pred_rf = RF.predict(xv_test)

In [49]:
RF.score(xv_test, y_test)

0.9901069518716578

In [50]:
print(classification_report(y_test, pred_rf))

              precision    recall  f1-score   support

           0       0.99      0.99      0.99      5976
           1       0.99      0.99      0.99      5244

    accuracy                           0.99     11220
   macro avg       0.99      0.99      0.99     11220
weighted avg       0.99      0.99      0.99     11220



## Checking Fake News

In [53]:
def output_label(n):
    if n == 0:
        return "Fake News"
    elif n == 1:
        return "Not A Fake News"


def manual_testing(news):
    testing_news = {"text": [news]}
    new_def_test = pd.DataFrame(testing_news)

    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)

    models = {
        "Logistic Regression": LR,
        "Decision Tree": DT,
        "Random Forest": RF,
        "Gradient Boosting": GB
    }

    print()
    for name, model in models.items():
        prediction = model.predict(new_xv_test)[0]
        print(f"{name} Prediction: {output_label(prediction)}")


In [61]:
news = str(input())
manual_testing(news)

 Pro-Russian users have often repeated the Kremlin's original position that the invasion of Wkraine is a "special military operation" to "denazify" and "demilitarise" a "Neo-Nazi state". Many have downpl ayed allegations of Russian war crimes or even claimed that the war is a supposed "hoax". In one wid ely shared video, a news reporter could be seen standing in front of lines of body bags, one of which was moving. However, the footage did not show invented war casualties in Ukraine, but a "Fridays for Future" climate change protest in Vienna in February, three weeks before the invasion began. Days la ter, another viral video of a mannequin claimed to be proof that Ukrainian authorities had "staged" t he mass killing of civilians in the town of Bucha. The misleading clip showed a prosthetic doll bein g dressed and prepared by two men. Nadezhda, an assistant director for a Russian television programm e, confirmed to Euronews that the video showed their film set near St. Petersburg and n


Logistic Regression: Fake News
Decision Tree: Fake News
Random Forest: Not A Fake News
Gradient Boosting: Fake News
