# Fake News Detection using Artificial intelligence 

## Project Description

This project detects whether a given news text or data is real or fake using machine learning.
It is trained on past news data and helps users identify false information easily.

In [1]:
import pandas as pd
df=pd.read_csv("fake_news_dataset.csv")

In [2]:
df = df[['text','label']]
df.head() 

Unnamed: 0,text,label
0,more tax development both store agreement lawy...,real
1,probably guess western behind likely next inve...,fake
2,them identify forward present success risk sev...,fake
3,phone which item yard Republican safe where po...,fake
4,wonder myself fact difficult course forget exa...,fake


In [3]:
df['label']=df['label'].map({'fake':0,'real':1})
df.head()

Unnamed: 0,text,label
0,more tax development both store agreement lawy...,1
1,probably guess western behind likely next inve...,0
2,them identify forward present success risk sev...,0
3,phone which item yard Republican safe where po...,0
4,wonder myself fact difficult course forget exa...,0


In [4]:
from sklearn.model_selection import train_test_split
X=df['text']
Y=df['label']
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=42)

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer=TfidfVectorizer(stop_words='english',max_df=0.7)
X_train_tfidf=vectorizer.fit_transform(X_train)
X_test_tfidf=vectorizer.transform(X_test)

In [6]:
from sklearn.linear_model import LogisticRegression
model=LogisticRegression(max_iter=1000)
model.fit(X_train_tfidf,Y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [7]:
from sklearn.metrics import accuracy_score
Y_pred=model.predict(X_test_tfidf)
accuracy=accuracy_score(Y_test,Y_pred)
accuracy

0.51125

In [8]:
df['label'].value_counts()

label
0    10056
1     9944
Name: count, dtype: int64

In [9]:
from sklearn.linear_model import LogisticRegression
model=LogisticRegression(class_weight='balanced',max_iter=1000)
model.fit(X_train_tfidf,Y_train)

0,1,2
,penalty,'l2'
,dual,False
,tol,0.0001
,C,1.0
,fit_intercept,True
,intercept_scaling,1
,class_weight,'balanced'
,random_state,
,solver,'lbfgs'
,max_iter,1000


In [10]:
Y_pred=model.predict(X_test_tfidf)
accuracy_score(Y_test,Y_pred)

0.509

In [11]:
from sklearn.metrics import confusion_matrix, classification_report
print(confusion_matrix(Y_test,Y_pred))
print(classification_report(Y_test,Y_pred))

[[1014 1015]
 [ 949 1022]]
              precision    recall  f1-score   support

           0       0.52      0.50      0.51      2029
           1       0.50      0.52      0.51      1971

    accuracy                           0.51      4000
   macro avg       0.51      0.51      0.51      4000
weighted avg       0.51      0.51      0.51      4000



In [12]:
def predict_news(text):
    text_tfidf=vectorizer.transform([text])
    prediction=model.predict(text_tfidf)
    return "Real News" if prediction[0]==1 else "Fake News"

In [26]:
predict_news("Government announces new education policy")

'Fake News'

In [27]:
predict_news("Shocking secret cure doctors don't want you to know")

'Fake News'

In [28]:
predict_news("Delhi schools to release nursery and class 1 admission lists with points based criteria")

'Real News'

In [29]:
predict_news("The Pudhumai Penn scheme of Tamilnadu government is for girl students")

'Real News'