In [38]:
import pandas as pd
import pickle 
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier 
from sklearn.metrics import accuracy_score,confusion_matrix

In [39]:
df=pd.read_csv('fake_and_real_news.csv')
df.head()

Unnamed: 0,Text,label
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,Fake
1,U.S. conservative leader optimistic of common ...,Real
2,"Trump proposes U.S. tax overhaul, stirs concer...",Real
3,Court Forces Ohio To Allow Millions Of Illega...,Fake
4,Democrats say Trump agrees to work on immigrat...,Real


In [40]:
df.describe

<bound method NDFrame.describe of                                                    Text label
0      Top Trump Surrogate BRUTALLY Stabs Him In The...  Fake
1     U.S. conservative leader optimistic of common ...  Real
2     Trump proposes U.S. tax overhaul, stirs concer...  Real
3      Court Forces Ohio To Allow Millions Of Illega...  Fake
4     Democrats say Trump agrees to work on immigrat...  Real
...                                                 ...   ...
9895   Wikileaks Admits To Screwing Up IMMENSELY Wit...  Fake
9896  Trump consults Republican senators on Fed chie...  Real
9897  Trump lawyers say judge lacks jurisdiction for...  Real
9898   WATCH: Right-Wing Pastor Falsely Credits Trum...  Fake
9899   Sean Spicer HILARIOUSLY Branded As Chickensh*...  Fake

[9900 rows x 2 columns]>

In [41]:
df.isna().sum()

Text     0
label    0
dtype: int64

### Encoding the Labels 

In [42]:
le = LabelEncoder()
df['label'] = le.fit_transform(df['label'])
df.head()

Unnamed: 0,Text,label
0,Top Trump Surrogate BRUTALLY Stabs Him In The...,0
1,U.S. conservative leader optimistic of common ...,1
2,"Trump proposes U.S. tax overhaul, stirs concer...",1
3,Court Forces Ohio To Allow Millions Of Illega...,0
4,Democrats say Trump agrees to work on immigrat...,1


### Spliting the data into training and testing phase

In [43]:
X = df['Text']
y = df['label']

In [44]:
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2, random_state=42)

### Vectorizing the Text

In [45]:
tfidf_vectorizer = TfidfVectorizer(max_features=5000, stop_words='english')
X_train_tfidf = tfidf_vectorizer.fit_transform(X_train)
X_test_tfidf = tfidf_vectorizer.transform(X_test)

X_train_tfidf.shape, X_test_tfidf.shape

((7920, 5000), (1980, 5000))

### Logistic_Regression

In [None]:
model = LogisticRegression(max_iter=1000, random_state=42)
model.fit(X_train_tfidf, y_train)




In [47]:
y_pred = model.predict(X_test_tfidf)
print(y_pred)


[0 1 1 ... 0 1 1]


In [48]:
accuracy = accuracy_score(y_test, y_pred)
print(accuracy)

0.9929292929292929


In [49]:
print(confusion_matrix(y_test,y_pred))

[[ 963   10]
 [   4 1003]]


### Decision_Tree_Classifier

In [50]:
model2 = DecisionTreeClassifier()
model2.fit(X_train_tfidf,y_train)

In [51]:
y_pred2 = model2.predict(X_test_tfidf)
print(y_pred2)

[0 1 1 ... 0 1 1]


In [52]:
print(accuracy_score(y_test, y_pred2))

0.998989898989899


In [53]:
print(confusion_matrix(y_test, y_pred2))

[[ 972    1]
 [   1 1006]]


### Random_Forest_CLassifier

In [54]:
model3 = RandomForestClassifier()
model3.fit(X_train_tfidf,y_train)

In [55]:
y_pred3 = model3.predict(X_test_tfidf)
print(y_pred3)

[0 1 1 ... 0 1 1]


In [56]:
print(accuracy_score(y_test, y_pred3))

0.9984848484848485


In [57]:
print(confusion_matrix(y_test, y_pred3))

[[ 971    2]
 [   1 1006]]


In [58]:
with open (r'C:\Users\OM yadav\OneDrive\Desktop\ML Project\news.pkl', 'wb') as f:
    pickle.dump(model,f)

In [59]:
with open(r'C:\Users\OM yadav\OneDrive\Desktop\ML Project\tfidf_vectorizer.pkl', 'wb') as f:
    pickle.dump(tfidf_vectorizer, f)