<a href="https://colab.research.google.com/github/KarandeepSinghBedi/Applied_Machine_Learning/blob/main/Assingment_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assingment 2

Importing the libraries

In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, f1_score, precision_score, recall_score, confusion_matrix


Importing the Dataset

In [7]:
# Load Banknote Authentication dataset
banknote_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00267/data_banknote_authentication.txt'
banknote_columns = ['Variance', 'Skewness', 'Curtosis', 'Entropy', 'Class']
banknote_df = pd.read_csv(banknote_url, header=None, names=banknote_columns)


# Preprocessing for Banknote Authentication Dataset
X_banknote = banknote_df.drop('Class', axis=1)
y_banknote = banknote_df['Class']

# Split data into training and test sets (80-20 split)
X_train_banknote, X_test_banknote, y_train_banknote, y_test_banknote = train_test_split(X_banknote, y_banknote, test_size=0.2, random_state=42)

# Feature scaling (standardizing the features)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train_banknote)
X_test_scaled = scaler.transform(X_test_banknote)

In [8]:
# Load Haberman’s Survival dataset
# Updated URL for Haberman's Survival dataset
haberman_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/haberman/haberman.data'
haberman_columns = ['Age', 'Year', 'Nodes', 'Survival']
haberman_df = pd.read_csv(haberman_url, header=None, names=haberman_columns)


# Preprocessing for Haberman’s Survival Dataset
X_haberman = haberman_df.drop('Survival', axis=1)
y_haberman = haberman_df['Survival']

# Split data into training and test sets (80-20 split)
X_train_haberman, X_test_haberman, y_train_haberman, y_test_haberman = train_test_split(X_haberman, y_haberman, test_size=0.2, random_state=42)

# Feature scaling (standardizing the features)
X_train_scaled_haberman = scaler.fit_transform(X_train_haberman)
X_test_scaled_haberman = scaler.transform(X_test_haberman)


In [9]:
# Initialize models
models = {
    'Naive Bayes': GaussianNB(),
    'SVM': SVC(),
    'Logistic Regression': LogisticRegression(),
    'Random Forest': RandomForestClassifier(random_state=42)
}


In [17]:
# Store results
results_banknote = {}

# Train and evaluate models
for model_name, model in models.items():
    model.fit(X_train_scaled, y_train_banknote)
    y_pred_banknote = model.predict(X_test_scaled)

    # Accuracy and other metrics
    accuracy = accuracy_score(y_test_banknote, y_pred_banknote,)
    precision, recall, fscore, _ = precision_recall_fscore_support(y_test_banknote, y_pred_banknote, average='binary')
    macroF1Score = f1_score(y_test_banknote, y_pred_banknote, average='macro'),
    macroPrecision = precision_score(y_test_banknote, y_pred_banknote, average='macro'),
    macroRecall = recall_score(y_test_banknote, y_pred_banknote, average='macro'),
    confusionMatrix = confusion_matrix(y_test_banknote, y_pred_banknote)

    results_banknote[model_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': fscore,
        'Macro F1 Score': macroF1Score,
        'Macro Precision': macroPrecision,
        'Macro Recall': macroRecall,
        'Confusion Matrix': confusionMatrix

    }

# Display results for Banknote Authentication
print("Banknote Authentication - Model Performance: \n")
for model_name, result in results_banknote.items():
    print(f"Model: {model_name}")
    print("-" * 40)
    for metric, value in result.items():
        if metric != 'Confusion Matrix':  # Skip printing the raw matrix
            print(f"{metric}: {value}")

    # Beautify Confusion Matrix
    cm = result['Confusion Matrix']
    cm_df = pd.DataFrame(cm, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
    print("Confusion Matrix:")
    display(cm_df)  # Use display for formatted output

    print("-" * 40)
    print("\n")

Banknote Authentication - Model Performance: 

Model: Naive Bayes
----------------------------------------
Accuracy: 0.8072727272727273
Precision: 0.8557692307692307
Recall: 0.7007874015748031
F1-Score: 0.7705627705627706
Macro F1 Score: (0.8022092849679057,)
Macro Precision: (0.8167735042735043,)
Macro Recall: (0.799718025111726,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,133,15
Actual 1,38,89


----------------------------------------


Model: SVM
----------------------------------------
Accuracy: 1.0
Precision: 1.0
Recall: 1.0
F1-Score: 1.0
Macro F1 Score: (1.0,)
Macro Precision: (1.0,)
Macro Recall: (1.0,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,148,0
Actual 1,0,127


----------------------------------------


Model: Logistic Regression
----------------------------------------
Accuracy: 0.9781818181818182
Precision: 0.9689922480620154
Recall: 0.984251968503937
F1-Score: 0.9765625
Macro F1 Score: (0.9780771683673469,)
Macro Precision: (0.9776468089625145,)
Macro Recall: (0.978612470738455,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,144,4
Actual 1,2,125


----------------------------------------


Model: Random Forest
----------------------------------------
Accuracy: 0.9927272727272727
Precision: 1.0
Recall: 0.984251968503937
F1-Score: 0.9920634920634921
Macro F1 Score: (0.9926760413337594,)
Macro Precision: (0.9933333333333334,)
Macro Recall: (0.9921259842519685,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,148,0
Actual 1,2,125


----------------------------------------




In [21]:
# Train and evaluate models for Haberman’s dataset
results_haberman = {}

for model_name, model in models.items():
    model.fit(X_train_scaled_haberman, y_train_haberman)
    y_pred_haberman = model.predict(X_test_scaled_haberman)

    # Accuracy and other metrics
    accuracy = accuracy_score(y_test_haberman, y_pred_haberman)
    precision, recall, fscore, _ = precision_recall_fscore_support(y_test_haberman, y_pred_haberman, average='binary')
    macroF1Score = f1_score(y_test_haberman, y_pred_haberman, average='macro'),
    macroPrecision = precision_score(y_test_haberman, y_pred_haberman, average='macro'),
    macroRecall = recall_score(y_test_haberman, y_pred_haberman, average='macro'),
    confusionMatrix = confusion_matrix(y_test_haberman, y_pred_haberman)

    results_haberman[model_name] = {
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': fscore,
        'Macro F1 Score': macroF1Score,
        'Macro Precision': macroPrecision,
        'Macro Recall': macroRecall,
        'Confusion Matrix': confusionMatrix

    }

# Display results for Haberman’s Survival
print("Haberman's Survival - Model Performance")
for model_name, result in results_haberman.items():
    print(f"Model: {model_name}")
    print("-" * 40)
    for metric, value in result.items():
        if metric != 'Confusion Matrix':  # Skip printing the raw matrix
            print(f"{metric}: {value}")

    # Beautify Confusion Matrix
    cm = result['Confusion Matrix']
    cm_df = pd.DataFrame(cm, index=['Actual 0', 'Actual 1'], columns=['Predicted 0', 'Predicted 1'])
    print("Confusion Matrix:")
    display(cm_df)  # Use display for formatted output

    print("-" * 40)
    print("\n")

Haberman's Survival - Model Performance
Model: Naive Bayes
----------------------------------------
Accuracy: 0.7096774193548387
Precision: 0.7407407407407407
Recall: 0.9090909090909091
F1-Score: 0.8163265306122449
Macro F1 Score: (0.5620094191522763,)
Macro Precision: (0.6203703703703703,)
Macro Recall: (0.5656565656565656,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,40,4
Actual 1,14,4


----------------------------------------


Model: SVM
----------------------------------------
Accuracy: 0.6774193548387096
Precision: 0.7307692307692307
Recall: 0.8636363636363636
F1-Score: 0.7916666666666666
Macro F1 Score: (0.5386904761904762,)
Macro Precision: (0.5653846153846154,)
Macro Recall: (0.5429292929292929,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,38,6
Actual 1,14,4


----------------------------------------


Model: Logistic Regression
----------------------------------------
Accuracy: 0.6935483870967742
Precision: 0.7272727272727273
Recall: 0.9090909090909091
F1-Score: 0.8080808080808081
Macro F1 Score: (0.5240404040404041,)
Macro Precision: (0.577922077922078,)
Macro Recall: (0.5378787878787878,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,40,4
Actual 1,15,3


----------------------------------------


Model: Random Forest
----------------------------------------
Accuracy: 0.7096774193548387
Precision: 0.7407407407407407
Recall: 0.9090909090909091
F1-Score: 0.8163265306122449
Macro F1 Score: (0.5620094191522763,)
Macro Precision: (0.6203703703703703,)
Macro Recall: (0.5656565656565656,)
Confusion Matrix:


Unnamed: 0,Predicted 0,Predicted 1
Actual 0,40,4
Actual 1,14,4


----------------------------------------


