In [None]:

from google.colab import files
uploaded = files.upload()
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import PassiveAggressiveClassifier
from sklearn.metrics import accuracy_score, confusion_matrix
df = pd.read_csv('fake_or_real_news.csv')
print("Dataset Preview:\n", df.head())
X = df['text']
y = df['label']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
vectorizer = TfidfVectorizer(stop_words= 'english', max_df=0.7)
tfidf_train = vectorizer.fit_transform(X_train)
tfidf_test = vectorizer.transform(X_test)
model = PassiveAggressiveClassifier(max_iter= 50)
model.fit(tfidf_train, y_train)
y_pred = model.predict(tfidf_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"\n Model Accuracy:{round(accuracy*100, 2)}%")
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
plt.figure(figsize= (6, 4))
sns.heatmap(cm, annot=True, fmt='d', cmap= 'Blues', xticklabels= ['Real', 'Fake'], yticklabels= ['Real', 'Fake'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title(' Fake News Detection - Confusion Matrix')
plt.show( )
plt.figure(figsize=(6, 4))
sns.countplot(x= 'label', data = df, palette= 'Set2')
plt.xticks([ 0, 1], ['Real', 'Fake'])
plt.title('News Type Distribution in Dataset')
plt.xlabel('News Type')
plt.ylabel('Count')
plt.show( )

In [None]:
# Step 1: Import libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Step 2: CSV file load करो
df = pd.read_csv('fake_or_real_news.csv')  # 👈 CSV फाइल इसी नाम से होनी चाहिए

# Step 3: Data check करें
print(df.head())
print(df['label'].value_counts())  # कितनी real और fake news हैं

# Step 4: Text और Label अलग करें
X = df['text']   # न्यूज़ का main content
y = df['label']  # real या fake

# Step 5: Text को numerical features में बदलो (TF-IDF)
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_vectorized = vectorizer.fit_transform(X)

# Step 6: Train-Test Split (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X_vectorized, y, test_size=0.2, random_state=42)

# Step 7: Logistic Regression Model Train करो
model = LogisticRegression()
model.fit(X_train, y_train)

# Step 8: Accuracy Check करो
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy*100:.2f}%")

# Step 9: Test Prediction
sample_news = ["The government has passed a new law on data privacy."]
sample_vector = vectorizer.transform(sample_news)
print("Prediction:", model.predict(sample_vector)[0])