In [22]:
# STEP 1: Upload your dataset
from google.colab import files
uploaded = files.upload()

import pandas as pd
from io import StringIO

# STEP 2: Load CSV file
for file_name in uploaded.keys():
    df = pd.read_csv(StringIO(uploaded[file_name].decode('utf-8')))
    print(f"\n✅ Dataset '{file_name}' uploaded successfully.\n")
    break  # Only use the first file

# Show a preview
print("📄 First 5 rows of the dataset:")
display(df.head())

# STEP 3: Select the target column
print("\n🔍 Available columns:", df.columns.tolist())
target_col = input("\n🎯 Enter the name of the target (label) column (e.g., 'category'): ")

# Split into features and target
X = df.drop(columns=[target_col])
y = df[target_col]

# Handle categorical data (e.g., turning text into numbers)
X = pd.get_dummies(X)  # This converts categorical columns to numeric
if y.dtype == 'object':
    y = pd.factorize(y)[0]  # Convert categorical labels to numeric values

# STEP 4: Split the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# STEP 5: Train Random Forest
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_pred = rf_model.predict(X_test)

# STEP 6: Train Naive Bayes
from sklearn.naive_bayes import GaussianNB
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_pred = nb_model.predict(X_test)

# STEP 7: Evaluate Models (Accuracy, Classification Report, Confusion Matrix)
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

# Accuracy scores
rf_acc = accuracy_score(y_test, rf_pred)
nb_acc = accuracy_score(y_test, nb_pred)

# Print accuracy results
print("\n📊 Accuracy Results:")
print(f"🌲 Random Forest Accuracy: {rf_acc:.2%}")
print(f"🧠 Naive Bayes Accuracy: {nb_acc:.2%}")

# Classification Report for Random Forest
print("\n🌲 Random Forest Classification Report:")
print(classification_report(y_test, rf_pred))

# Confusion Matrix for Random Forest
print("🌲 Random Forest Confusion Matrix:")
plt.figure(figsize=(5, 4))
sns.heatmap(confusion_matrix(y_test, rf_pred), annot=True, fmt='d', cmap='Blues')
plt.title("Random Forest Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()

# Classification Report for Naive Bayes
print("\n🧠 Naive Bayes Classification Report:")
print(classification_report(y_test, nb_pred))

# Confusion Matrix for Naive Bayes
print("🧠 Naive Bayes Confusion Matrix:")
plt.figure(figsize=(5, 4))
sns.heatmap(confusion_matrix(y_test, nb_pred), annot=True, fmt='d', cmap='Greens')
plt.title("Naive Bayes Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")


ModuleNotFoundError: No module named 'google.colab'