In [11]:
# 1. Detailed EDA
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Load the dataset
df = pd.read_csv("F:\SEM 6\Machine Learning\Dataset\ev_charging_patterns.csv")

def perform_eda(df):
    print("Dataset Information:")
    print(df.info())
    print("\nFirst 5 Rows of Data:")
    print(df.head())
    print("\nSummary Statistics:")
    print(df.describe())
    print("\nNull Values:")
    print(df.isnull().sum())
    
    # 1. Correlation Matrix
    if df.select_dtypes(include=['number']).shape[1] > 1:
        plt.figure(figsize=(10, 8))
        sns.heatmap(df.corr(), annot=True, cmap='coolwarm')
        plt.title('Correlation Matrix')
        plt.show()

# 2. Train Gaussian Naive Bayes Classifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

def train_gaussian_nb(X_train, X_test, y_train, y_test):
    gnb = GaussianNB()
    gnb.fit(X_train, y_train)
    y_pred_gnb = gnb.predict(X_test)
    accuracy_gnb = accuracy_score(y_test, y_pred_gnb)
    return gnb, y_pred_gnb, accuracy_gnb

# 3. Train Multinomial Naïve Bayes Classifier
from sklearn.naive_bayes import MultinomialNB

def train_multinomial_nb(X_train, X_test, y_train, y_test):
    mnb = MultinomialNB()
    mnb.fit(X_train, y_train)
    y_pred_mnb = mnb.predict(X_test)
    accuracy_mnb = accuracy_score(y_test, y_pred_mnb)
    return mnb, y_pred_mnb, accuracy_mnb

# 4. Check Accuracy Score and 5. Confusion Matrix with Cross Validation
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import cross_val_score
import numpy as np

def evaluate_model(model, X_train, X_test, y_train, y_test, y_pred, model_name):
    print(f"{model_name} Accuracy: {accuracy_score(y_test, y_pred) * 100:.2f}%")
    cm = confusion_matrix(y_test, y_pred)
    print("Confusion Matrix:")
    print(cm)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.show()

    scores = cross_val_score(model, X_train, y_train, cv=5, scoring='accuracy')
    print(f"{model_name} Cross-Validation Scores: {scores}")
    print(f"{model_name} Mean CV Accuracy: {np.mean(scores) * 100:.2f}%\n")

# 6. ROC
from sklearn.metrics import roc_curve, auc

def plot_roc_curve(model, X_test, y_test, model_name):
    if hasattr(model, 'predict_proba'):
        y_prob = model.predict_proba(X_test)[:, 1]
        fpr, tpr, _ = roc_curve(y_test, y_prob)
        roc_auc = auc(fpr, tpr)

        plt.figure(figsize=(10, 6))
        plt.plot(fpr, tpr, label=f'{model_name} (AUC = {roc_auc:.2f})')
        plt.plot([0, 1], [0, 1], 'k--')
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title(f'{model_name} ROC Curve')
        plt.legend(loc='lower right')
        plt.show()

# Update target_column and preprocess the dataset
# Verify the column names in the dataset
print("Available columns in the dataset:", df.columns)

# Check and update the target column name
target_column = 'Charging_Status'  # Replace with the actual target column name

if target_column in df.columns:
    X = df.drop(columns=[target_column])
    y = df[target_column]
else:
    raise KeyError(f"The target column '{target_column}' is not found in the dataset. Available columns are: {df.columns}")

# Handle categorical features
X = pd.get_dummies(X, drop_first=True)

# Split the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Continue with further steps
perform_eda(df)

# Train Gaussian Naive Bayes
gnb, y_pred_gnb, accuracy_gnb = train_gaussian_nb(X_train, X_test, y_train, y_test)
evaluate_model(gnb, X_train, X_test, y_train, y_test, y_pred_gnb, "GaussianNB")
plot_roc_curve(gnb, X_test, y_test, "GaussianNB")

# Train Multinomial Naive Bayes
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

mnb, y_pred_mnb, accuracy_mnb = train_multinomial_nb(X_train_scaled, X_test_scaled, y_train, y_test)
evaluate_model(mnb, X_train_scaled, X_test_scaled, y_train, y_test, y_pred_mnb, "MultinomialNB")
plot_roc_curve(mnb, X_test_scaled, y_test, "MultinomialNB")

# Handle categorical features
X = pd.get_dummies(X, drop_first=True)

from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Perform EDA
perform_eda(df)

# Train Gaussian Naive Bayes
gnb, y_pred_gnb, accuracy_gnb = train_gaussian_nb(X_train, X_test, y_train, y_test)
evaluate_model(gnb, X_train, X_test, y_train, y_test, y_pred_gnb, "GaussianNB")
plot_roc_curve(gnb, X_test, y_test, "GaussianNB")

# Train Multinomial Naive Bayes
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

mnb, y_pred_mnb, accuracy_mnb = train_multinomial_nb(X_train_scaled, X_test_scaled, y_train, y_test)
evaluate_model(mnb, X_train_scaled, X_test_scaled, y_train, y_test, y_pred_mnb, "MultinomialNB")
plot_roc_curve(mnb, X_test_scaled, y_test, "MultinomialNB")


KeyError: "['Charging_Status'] not found in axis"