In [6]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Load dataset
data = pd.read_csv(r"C:\Users\ganes\Downloads\MLDaata.csv")

# Encode categorical data if necessary
le = LabelEncoder()
data['gender'] = le.fit_transform(data['gender'])

# Define features and target
X = data.drop(['Has_Disease', 'Length_of_Stay'], axis=1)
y_classification = data['Has Disease']
y_regression = data['Length of Stay']

# Train-test split for classification
X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X, y_classification, test_size=0.3, random_state=42)

# Train-test split for regression
X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X, y_regression, test_size=0.3, random_state=42)

import time
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report, confusion_matrix, mean_squared_error, r2_score

# Initialize models
knn = KNeighborsClassifier(n_neighbors=3)
svm = SVC()
dt = DecisionTreeClassifier()
nb = GaussianNB()
lr = LinearRegression()
kmeans = KMeans(n_clusters=2)

# Dictionaries to store results
results = {
    'k-NN': {},
    'SVM': {},
    'Decision Tree': {},
    'Naive Bayes': {},
    'Linear Regression': {},
    'K-Means Clustering': {}
}

# Function to evaluate and store results
def evaluate_model(name, model, X_train, X_test, y_train, y_test, classification=True):
    start_time = time.time()
    model.fit(X_train, y_train)
    training_time = time.time() - start_time
    
    start_time = time.time()
    predictions = model.predict(X_test)
    prediction_time = time.time() - start_time

    if classification:
        accuracy = accuracy_score(y_test, predictions)
        precision = precision_score(y_test, predictions, average='macro')
        recall = recall_score(y_test, predictions, average='macro')
        f1 = f1_score(y_test, predictions, average='macro')
        conf_matrix = confusion_matrix(y_test, predictions)
        class_report = classification_report(y_test, predictions)
        
        results[name] = {
            'accuracy': accuracy,
            'precision': precision,
            'recall': recall,
            'f1_score': f1,
            'training_time': training_time,
            'prediction_time': prediction_time,
            'confusion_matrix': conf_matrix,
            'classification_report': class_report
        }
    else:
        mse = mean_squared_error(y_test, predictions)
        r2 = r2_score(y_test, predictions)
        
        results[name] = {
            'mse': mse,
            'r2_score': r2,
            'training_time': training_time,
            'prediction_time': prediction_time
        }

# Evaluate classification models
evaluate_model('k-NN', knn, X_train_class, X_test_class, y_train_class, y_test_class)
evaluate_model('SVM', svm, X_train_class, X_test_class, y_train_class, y_test_class)
evaluate_model('Decision Tree', dt, X_train_class, X_test_class, y_train_class, y_test_class)
evaluate_model('Naive Bayes', nb, X_train_class, X_test_class, y_train_class, y_test_class)

# Evaluate regression model
evaluate_model('Linear Regression', lr, X_train_reg, X_test_reg, y_train_reg, y_test_reg, classification=False)

# Evaluate clustering model
start_time = time.time()
kmeans.fit(X_train_class)
training_time_kmeans = time.time() - start_time

start_time = time.time()
kmeans_predictions = kmeans.predict(X_test_class)
prediction_time_kmeans = time.time() - start_time

# Since k-means is unsupervised, we won't have accuracy, confusion matrix, etc.
results['K-Means Clustering'] = {
    'training_time': training_time_kmeans,
    'prediction_time': prediction_time_kmeans
}
for name, result in results.items():
    if 'accuracy' in result:
        print(f"\n{name} Classification Metrics:")
        print(f"Accuracy: {result['accuracy']:.2f}")
        print(f"Precision: {result['precision']:.2f}")
        print(f"Recall: {result['recall']:.2f}")
        print(f"F1-Score: {result['f1_score']:.2f}")
        print(f"Training Time: {result['training_time']:.4f} seconds")
        print(f"Prediction Time: {result['prediction_time']:.4f} seconds")
        print(f"Confusion Matrix:\n{result['confusion_matrix']}")
        print(f"Classification Report:\n{result['classification_report']}")
    elif 'mse' in result:
        print(f"\n{name} Regression Metrics:")
        print(f"MSE: {result['mse']:.2f}")
        print(f"R-squared: {result['r2_score']:.2f}")
        print(f"Training Time: {result['training_time']:.4f} seconds")
        print(f"Prediction Time: {result['prediction_time']:.4f} seconds")
    else:
        print(f"\n{name} Clustering Metrics:")
        print(f"Training Time: {result['training_time']:.4f} seconds")
        print(f"Prediction Time: {result['prediction_time']:.4f} seconds")



KeyError: 'gender'