### Importing Libraries

In [47]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import nltk
from nltk.sentiment.vader import SentimentIntensityAnalyzer as SIA_vad
import re
# Download
nltk.download('vader_lexicon')
nltk.download('stopwords')
nltk.download('punkt')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\garge\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\garge\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\garge\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

#### Reading the data csvs

In [48]:
df_fake = pd.read_csv('dataset/DataSet_Misinfo_FAKE.csv')
df_true = pd.read_csv('dataset/DataSet_Misinfo_TRUE.csv')

In [49]:
df_fake.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43642 entries, 0 to 43641
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   Unnamed: 0  43642 non-null  int64 
 1   text        43642 non-null  object
dtypes: int64(1), object(1)
memory usage: 682.0+ KB


In [50]:
df_fake.head()

Unnamed: 0.1,Unnamed: 0,text
0,0,Donald Trump just couldn t wish all Americans ...
1,1,House Intelligence Committee Chairman Devin Nu...
2,2,"On Friday, it was revealed that former Milwauk..."
3,3,"On Christmas day, Donald Trump announced that ..."
4,4,Pope Francis used his annual Christmas Day mes...


In [51]:
df_true.head()

Unnamed: 0.1,Unnamed: 0,text
0,0,The head of a conservative Republican faction ...
1,1,Transgender people will be allowed for the fir...
2,2,The special counsel investigation of links bet...
3,3,Trump campaign adviser George Papadopoulos tol...
4,4,President Donald Trump called on the U.S. Post...


#### Data Cleaning

In [52]:
df_true = df_true.dropna()

In [53]:
# Adding the Label Column to the data
df_true['label'] = 0
df_true = df_true.drop('Unnamed: 0', axis = 1)

df_fake['label'] = 1
df_fake = df_fake.drop('Unnamed: 0', axis = 1)

# Merging both the dataset
df = pd.concat([df_true, df_fake])

In [54]:
df[df['label'] == 1]

Unnamed: 0,text,label
0,Donald Trump just couldn t wish all Americans ...,1
1,House Intelligence Committee Chairman Devin Nu...,1
2,"On Friday, it was revealed that former Milwauk...",1
3,"On Christmas day, Donald Trump announced that ...",1
4,Pope Francis used his annual Christmas Day mes...,1
...,...,...
43637,The USA wants to divide Syria.\r\n\r\nGreat Br...,1
43638,The Ukrainian coup d'etat cost the US nothing ...,1
43639,The European Parliament falsifies history by d...,1
43640,The European Parliament falsifies history by d...,1


In [55]:
df['text'] = df['text'].astype(str)

In [56]:
df['text'] = [re.sub('\t', '',x) for x in df['text']]
df['text'] = [re.sub('\n', '',x) for x in df['text']]
df['text'] = [re.sub('\r', '',x) for x in df['text']]

#### Sentimental Analysis of the all the text

In [57]:
sid = SIA_vad()

In [58]:
def vader_compound(text):
    scores = sid.polarity_scores(text)
    compound = scores['compound']
    return compound

In [59]:
df['vader_score'] = df['text'].apply(lambda x : vader_compound(x))

KeyboardInterrupt: 

#### Calculating the mean vader score for fake and true news

In [None]:
new_df2 = df[['label', 'vader_score']]
grouped_df2 = new_df2.groupby('label').mean().reset_index()

In [None]:
grouped_df2.head()

Unnamed: 0,label,vader_score
0,0,0.097723
1,1,-0.078427


In [None]:
df['sentiment'] = np.where(df['vader_score'] > 0,1,0)

In [None]:
df.to_csv('data_feature.csv')