# Assignment 1: Machine Learning Implementation

This notebook covers 6 different machine learning tasks using the **Iris Dataset**.

## Setup: Loading the Dataset

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, mean_squared_error

# Load Iris dataset
iris = datasets.load_iris()
X = iris.data
y = iris.target

# Split into training and testing sets (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print(f"Dataset loaded. Features: {iris.feature_names}")
print(f"Target classes: {iris.target_names}")

## Helper Function for Plotting
We use this simple function to plot the accuracy and confusion matrix for each model.

In [None]:
def plot_results(y_true, y_pred, model_name, accuracy=None):
    # 1. Accuracy Calculation
    if accuracy is None:
        accuracy = accuracy_score(y_true, y_pred)
    
    # 2. Plotting Confusion Matrix
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(12, 4))
    
    # Left side: Accuracy Graph (Bar)
    plt.subplot(1, 2, 1)
    plt.bar(['Accuracy'], [accuracy], color='skyblue')
    plt.ylim(0, 1)
    plt.title(f'{model_name} Accuracy: {accuracy:.2f}')
    
    # Right side: Confusion Matrix (Heatmap)
    plt.subplot(1, 2, 2)
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                xticklabels=iris.target_names, yticklabels=iris.target_names)
    plt.title(f'{model_name} Confusion Matrix')
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    
    plt.tight_layout()
    plt.show()

### 1. Decision Tree

In [None]:
from sklearn.tree import DecisionTreeClassifier

# Create and train model
dt_model = DecisionTreeClassifier()
dt_model.fit(X_train, y_train)

# Predict and plot
dt_preds = dt_model.predict(X_test)
plot_results(y_test, dt_preds, "Decision Tree")

### 2. Support Vector Machine (SVM)

In [None]:
from sklearn.svc import SVC

# Create and train model
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

# Predict and plot
svm_preds = svm_model.predict(X_test)
plot_results(y_test, svm_preds, "SVM")

### 3. Multilayer Perceptron (MLP)

In [None]:
from sklearn.neural_network import MLPClassifier

# Create and train model
mlp_model = MLPClassifier(max_iter=500)
mlp_model.fit(X_train, y_train)

# Predict and plot
mlp_preds = mlp_model.predict(X_test)
plot_results(y_test, mlp_preds, "MLP (Neural Network)")

### 4. Linear Regression
Note: Linear Regression is usually for continuous values. To get a confusion matrix, we round the predictions to the nearest integer classes (0, 1, 2).

In [None]:
from sklearn.linear_model import LinearRegression

# Create and train model
lr_model = LinearRegression()
lr_model.fit(X_train, y_train)

# Predict (continous values) and round to classes
lr_preds_raw = lr_model.predict(X_test)
lr_preds = np.clip(np.round(lr_preds_raw), 0, 2).astype(int)

plot_results(y_test, lr_preds, "Linear Regression")

### 5. K-means Clustering
K-means is unsupervised. We cluster the data into 3 groups and check how well they match the actual labels.

In [None]:
from sklearn.cluster import KMeans
from scipy.stats import mode

# Create and cluster into 3 groups
kmeans = KMeans(n_clusters=3, random_state=42)
clusters = kmeans.fit_predict(X)

# Mapping clusters to real class names (since labels might be swapped)
labels = np.zeros_like(clusters)
for i in range(3):
    mask = (clusters == i)
    labels[mask] = mode(y[mask])[0]

plot_results(y, labels, "K-Means Clustering")

### 6. Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

# Create and train model
rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Predict and plot
rf_preds = rf_model.predict(X_test)
plot_results(y_test, rf_preds, "Random Forest")