In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# Step 1: Load the dataset
# Replace 'your_dataset.csv' with the path to your dataset file
data = pd.read_csv('Fake.csv')

# Assuming the column containing the news article title is named 'title',
# the column containing the news article text is named 'text',
# the column containing the subject is named 'subject',
# and the column containing the labels is named 'date'
X_title_text = data['title'] + " " + data['text']
y = data['subject']

# Step 2: Preprocess the data
vectorizer = TfidfVectorizer(stop_words='english', max_df=0.7)
X_tfidf = vectorizer.fit_transform(X_title_text)

# Step 3: Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_tfidf, y, test_size=0.2, random_state=42)

# Step 4: Create and train the random forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train, y_train)

# Step 5: Evaluate the model's performance
y_pred = rf_classifier.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
print("Accuracy:", accuracy)

print("Classification Report:")
print(classification_report(y_test, y_pred))

print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Step 6: Make predictions on new data (assuming you have new_data as a list of dictionaries)
new_data = [
    {"title": "Breaking News", "text": "This is a real news article."},
    {"title": "Aliens!", "text": "Breaking: Aliens land on Earth!"},
    {"title": "New Planet", "text": "Scientists discover a new planet in our solar system."}
]

new_data_texts = [item['title'] + " " + item['text'] for item in new_data]
new_data_tfidf = vectorizer.transform(new_data_texts)
new_data_predictions = rf_classifier.predict(new_data_tfidf)

for data_point, prediction in zip(new_data, new_data_predictions):
    print(f"({data_point['title']}) '{data_point['text']}' belongs to the subject: {prediction}")
