<a href="https://colab.research.google.com/github/Subhasree5/Ml-Workshop-/blob/main/classiffication.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Libraries

In [15]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, classification_report
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

Load dataset

In [16]:
penguins = pd.read_csv('/content/penguins_size.csv')

Drop misssing values

In [23]:
penguins.dropna(inplace=True)

In [24]:
penguins.isnull().sum()

Unnamed: 0,0
species,0
island,0
culmen_length_mm,0
culmen_depth_mm,0
flipper_length_mm,0
body_mass_g,0
sex,0


In [26]:
label_encoders = {}
for column in ['species', 'sex', 'island']:
    le = LabelEncoder()
    # Fix: Use penguins[column] to select the column and apply dropna() to it
    penguins[column] = le.fit_transform(penguins[column].dropna())
    label_encoders[column] = le

split the dataset


In [31]:
X = penguins.drop(columns=['species']) # Use square brackets to select the 'species' column as a DataFrame.
y = penguins['species']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)


Standardize numerical features

In [33]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

Function to train and evaluate a model

In [34]:
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    metrics = {
        'Accuracy': accuracy_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred, average='weighted'),
        'Recall': recall_score(y_test, y_pred, average='weighted'),
        'F1-Score': f1_score(y_test, y_pred, average='weighted')
    }
    return metrics, classification_report(y_test, y_pred, target_names=label_encoders['species'].classes_)

Models

In [35]:
models = {
    "Logistic Regression": LogisticRegression(max_iter=200),
    "Naïve Bayes": GaussianNB(),
    "KNN": KNeighborsClassifier(n_neighbors=5),
    "Decision Tree": DecisionTreeClassifier(random_state=42),
    "Support Vector Classifier": SVC(kernel='linear', random_state=42)
}

train and evaluate each model

In [36]:
results = {}
for name, model in models.items():
    metrics, report = evaluate_model(model, X_train, X_test, y_train, y_test)
    results[name] = (metrics, report)

# Display results
for name, (metrics, report) in results.items():
    print(f"\n{name} Performance Metrics:")
    for metric, value in metrics.items():
        print(f"{metric}: {value:.2f}")
    print("\nClassification Report:\n", report)


Logistic Regression Performance Metrics:
Accuracy: 1.00
Precision: 1.00
Recall: 1.00
F1-Score: 1.00

Classification Report:
               precision    recall  f1-score   support

      Adelie       1.00      1.00      1.00        44
   Chinstrap       1.00      1.00      1.00        21
      Gentoo       1.00      1.00      1.00        36

    accuracy                           1.00       101
   macro avg       1.00      1.00      1.00       101
weighted avg       1.00      1.00      1.00       101


Naïve Bayes Performance Metrics:
Accuracy: 0.85
Precision: 0.91
Recall: 0.85
F1-Score: 0.85

Classification Report:
               precision    recall  f1-score   support

      Adelie       1.00      0.66      0.79        44
   Chinstrap       0.60      1.00      0.75        21
      Gentoo       0.97      1.00      0.99        36

    accuracy                           0.85       101
   macro avg       0.86      0.89      0.84       101
weighted avg       0.91      0.85      0.85      