## Importing Nltk library

In [None]:
import nltk
nltk.download()

## Importing other required modules

In [None]:
import pandas as pd

In [None]:
fake = pd.read_csv("Fake.csv")
genuine = pd.read_csv("True.csv")

### Printing Info

In [None]:
print(fake.info())

In [None]:
print(genuine.info())

In [None]:
fake['target']=0
genuine['target']=1

In [None]:
genuine.head()

In [None]:
fake.head()

## Concatenating both datasets

In [None]:
df=pd.concat([fake,genuine],axis=0) 
df=df.reset_index(drop=True)

Dropping unnecessary columns

In [None]:
df=df.drop(['subject','date','title'],axis=1)
df.columns

### Preprocessing data

1. Tokenizing words

In [None]:
from nltk.tokenize import tokenize
df['text']=df['text'].apply(word_tokenize)
df

2. Stemming Words

In [None]:
from nltk.stem.snowball import SnowballSteamer
ss=SnowballSteamer("english")

In [None]:
def stem_word(text):
    return[ss.stem(word) for word in text]

In [None]:
df['text']=df['text'].apply(stem_word)
df

### Stopword Removal

In [None]:
from nltk.corpus import stopwords

In [None]:
def rem_stopwords(text):
    dt=[word for word in text if len(word)>2]
    return dt

In [None]:
df['text']=df['text'].apply(rem_stopwords)
df

### Join text

In [None]:
df['text']=df['text'].apply(' '.join)

## Splitting to Train and test data

In [None]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(df['text'],df['target'])

## Vectorization

In [None]:
from sklearn.feature_extraction.text import IfidfVectorizer
Iv= IfidfVectorizer(max_df=0.7)

In [None]:
X_train = Iv.fit_transform(X_train)
X_test=Iv.transform(X_test)

In [None]:
X_train.head()

In [None]:
X_test.head()

## Training model with Logistic Regression

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

In [None]:
m=LogisticRegression(max_iter=50)
m.fit(X_train,y_train)
prediction = m.predict(X_test)
print((accuracy_score(y_test,prediction))*100)