In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import joblib

# Step 1: Load the Data
df = pd.read_csv('newer_data.csv')

In [8]:
df.head()

Unnamed: 0,prompt,label
0,How does gravity affect planetary motion?,studies
1,What are the top 2024 fashion predictions?,fashion
2,What are the latest trends in home decor?,neither
3,What is the importance of the water cycle in n...,studies
4,What are the latest denim trends?,fashion


In [10]:
X = df['prompt'] 
y = df['label']  

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train) 
X_test_vec = vectorizer.transform(X_test)       

model = MultinomialNB()
model.fit(X_train_vec, y_train)

y_pred = model.predict(X_test_vec)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))

joblib.dump(model, 'naive_bayes_model.pkl')
joblib.dump(vectorizer, 'tfidf_vectorizer.pkl')

print("Model and vectorizer saved successfully.")


Accuracy: 0.9166666666666666
Classification Report:
               precision    recall  f1-score   support

     fashion       1.00      0.75      0.86         8
     fitness       1.00      1.00      1.00         6
     neither       0.50      1.00      0.67         2
     studies       1.00      1.00      1.00         8

    accuracy                           0.92        24
   macro avg       0.88      0.94      0.88        24
weighted avg       0.96      0.92      0.92        24

Confusion Matrix:
 [[6 0 2 0]
 [0 6 0 0]
 [0 0 2 0]
 [0 0 0 8]]
Model and vectorizer saved successfully.
