In [1]:
import pandas as pd

df = pd.read_csv('DataSetForModel.csv')  # Replace with your filename
print(df.head())  # Display first 5 rows


                                               title  \
0  u.s. budget fight looms, republican flip fisca...   
1  u.s. military accept transgender recruit monda...   
2  senior u.s. republican senator: 'let mr. muell...   
3  fbi russia probe helped australian diplomat ti...   
4  trump want postal service charge 'much more' a...   

                                                text       subject  \
0  washington (reuters) - head conservative repub...  politicsNews   
1  washington (reuters) - transgender people allo...  politicsNews   
2  washington (reuters) - special counsel investi...  politicsNews   
3  washington (reuters) - trump campaign adviser ...  politicsNews   
4  seattle washington (reuters) - president donal...  politicsNews   

         date  subject_encoded  label  \
0  2017-12-31                6      1   
1  2017-12-29                6      1   
2  2017-12-31                6      1   
3  2017-12-30                6      1   
4  2017-12-29                6      1

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
# Replace NaN in clean_text with empty string
df['clean_text'] = df['clean_text'].fillna("")

# Now split
X = df['clean_text']
y = df['label']

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Vectorize text using TF-IDF
tfidf = TfidfVectorizer(stop_words='english', max_features=5000)
X_train_tfidf = tfidf.fit_transform(X_train)
X_test_tfidf = tfidf.transform(X_test)

# Train Naive Bayes model
model = MultinomialNB()
model.fit(X_train_tfidf, y_train)

# Predict
y_pred = model.predict(X_test_tfidf)

# Evaluation
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
report = classification_report(y_test, y_pred)

# Print results
print("Accuracy:", accuracy)
print("\nClassification Report:\n", report)
print("Confusion Matrix:\n", conf_matrix)

Accuracy: 0.9312891674127126

Classification Report:
               precision    recall  f1-score   support

           0       0.93      0.94      0.93      4687
           1       0.93      0.92      0.93      4249

    accuracy                           0.93      8936
   macro avg       0.93      0.93      0.93      8936
weighted avg       0.93      0.93      0.93      8936

Confusion Matrix:
 [[4411  276]
 [ 338 3911]]


In [3]:
# Exporting Evaluation Results to CSV
# Organize evaluation metrics into a dictionary
results = {
    "accuracy": accuracy,
    "confusion_matrix": conf_matrix.flatten(),  # Flatten confusion matrix for easier CSV export
    "classification_report": str(report)  # Convert report to string for export
}

# Convert dictionary to DataFrame for CSV export
results_df = pd.DataFrame([results])

# Export the DataFrame to a CSV file
results_df.to_csv('naive_bayes_evaluation.csv', index=False)
