# Fake Disaster Tweet Detection

### Importing required library
Here I am going to importing some of the required library, if extra library is required to install It will be install later on.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import re
import string
import os

### Inserting fake and real dataset

In [None]:
df = pd.read_csv("train.csv")
df.head(10)

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1
5,8,,,#RockyFire Update => California Hwy. 20 closed...,1
6,10,,,#flood #disaster Heavy rain causes flash flood...,1
7,13,,,I'm on top of the hill and I can see a fire in...,1
8,14,,,There's an emergency evacuation happening now ...,1
9,15,,,I'm afraid that the tornado is coming to our a...,1


In [None]:
df = df.drop(["id", "keyword","location"], axis = 1)

In [None]:
df.isnull().sum()

text      0
target    0
dtype: int64

In [None]:
df.head(10)

Unnamed: 0,text,target
0,Our Deeds are the Reason of this #earthquake M...,1
1,Forest fire near La Ronge Sask. Canada,1
2,All residents asked to 'shelter in place' are ...,1
3,"13,000 people receive #wildfires evacuation or...",1
4,Just got sent this photo from Ruby #Alaska as ...,1
5,#RockyFire Update => California Hwy. 20 closed...,1
6,#flood #disaster Heavy rain causes flash flood...,1
7,I'm on top of the hill and I can see a fire in...,1
8,There's an emergency evacuation happening now ...,1
9,I'm afraid that the tornado is coming to our a...,1


In [None]:
df['text']

0       Our Deeds are the Reason of this #earthquake M...
1                  Forest fire near La Ronge Sask. Canada
2       All residents asked to 'shelter in place' are ...
3       13,000 people receive #wildfires evacuation or...
4       Just got sent this photo from Ruby #Alaska as ...
                              ...                        
7608    Two giant cranes holding a bridge collapse int...
7609    @aria_ahrary @TheTawniest The out of control w...
7610    M1.94 [01:04 UTC]?5km S of Volcano Hawaii. htt...
7611    Police investigating after an e-bike collided ...
7612    The Latest: More Homes Razed by Northern Calif...
Name: text, Length: 7613, dtype: object

#### Creating a function to convert the text in lowercase, remove the extra space, special chr., ulr and links.

In [None]:
def wordopt(text):
    text = text.lower()
    text = re.sub('\[.*?\]', '', text)
    text = re.sub("\\W"," ",text)
    text = re.sub('https?://\S+|www\.\S+', '', text)
    text = re.sub('<.*?>+', '', text)
    text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
    text = re.sub('\n', '', text)
    text = re.sub('\w*\d\w*', '', text)
    return text

In [None]:
df["text"] = df["text"].apply(wordopt)

#### Defining independent and dependent variable as x and y

In [None]:
x = df["text"]
y = df["target"]

#### Splitting the dataset into training set and testing set.

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25)

#### Convert text to vectors

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer

In [None]:
vectorization = TfidfVectorizer()
xv_train = vectorization.fit_transform(x_train)
xv_test = vectorization.transform(x_test)

### 1. Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression

In [None]:
LR = LogisticRegression()
LR.fit(xv_train,y_train)

LogisticRegression()

In [None]:
pred_lr=LR.predict(xv_test)

In [None]:
LR.score(xv_test, y_test)

0.8077731092436975

In [None]:
print(classification_report(y_test, pred_lr))

              precision    recall  f1-score   support

           0       0.80      0.88      0.84      1111
           1       0.81      0.70      0.75       793

    accuracy                           0.81      1904
   macro avg       0.81      0.79      0.80      1904
weighted avg       0.81      0.81      0.80      1904



### 2. Decision Tree Classification

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
DT = DecisionTreeClassifier()
DT.fit(xv_train, y_train)

DecisionTreeClassifier()

In [None]:
pred_dt = DT.predict(xv_test)

In [None]:
DT.score(xv_test, y_test)

0.7064075630252101

In [None]:
print(classification_report(y_test, pred_dt))

              precision    recall  f1-score   support

           0       0.74      0.74      0.74      1073
           1       0.66      0.67      0.66       831

    accuracy                           0.71      1904
   macro avg       0.70      0.70      0.70      1904
weighted avg       0.71      0.71      0.71      1904



### 3. Random Forest Classifier

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
RFC = RandomForestClassifier(random_state=0)
RFC.fit(xv_train, y_train)

RandomForestClassifier(random_state=0)

In [None]:
pred_rfc = RFC.predict(xv_test)

In [None]:
RFC.score(xv_test, y_test)

0.7872899159663865

In [None]:
print(classification_report(y_test, pred_rfc))

              precision    recall  f1-score   support

           0       0.76      0.91      0.83      1073
           1       0.85      0.63      0.72       831

    accuracy                           0.79      1904
   macro avg       0.80      0.77      0.77      1904
weighted avg       0.80      0.79      0.78      1904



# Model Testing With Manual Entry

### Tweet

In [None]:
def output_lable(n):
    if n == 0:
        return "Fake Disaster Tweet"
    elif n == 1:
        return "Not a fake Disaster Tweet"

def manual_testing(tweet):
    testing_tweet = {"text":[tweet]}
    new_def_test = pd.DataFrame(testing_tweet)
    new_def_test["text"] = new_def_test["text"].apply(wordopt)
    new_x_test = new_def_test["text"]
    new_xv_test = vectorization.transform(new_x_test)
    pred_LR = LR.predict(new_xv_test)
    pred_DT = DT.predict(new_xv_test)
    pred_RFC = RFC.predict(new_xv_test)

    return print("\n\nLR Prediction: {} \nDT Prediction: {} \nRFC Prediction: {} \n".format(output_lable(pred_LR[0]), output_lable(pred_DT), output_lable(pred_RFC)))

In [None]:
tweet = str(input())
manual_testing(tweet)

tornado is superb!


LR Prediction: Not a fake Disaster Tweet 
DT Prediction: Fake Disaster Tweet 
RFC Prediction: Fake Disaster Tweet 

