# Homework4 - CSC 480
#### Name: Sreyas Gangji

In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

# Load the training dataset
train_data = pd.read_csv("fashion_train.csv")

# Load the test dataset
test_data = pd.read_csv("fashion_test.csv")

# Display dataset structure
print(train_data.head())
print(test_data.head())

   label  pixel1  pixel2  pixel3  pixel4  pixel5  pixel6  pixel7  pixel8  \
0      0       0       0       0       1       2       0       0       0   
1      1       0       0       0       0       0       0       0       0   
2      0       0       0       0       0       1       0       0       0   
3      1       0       0       0       0       0       0       0       0   
4      0       0       0       0       0       0       0       0       0   

   pixel9  ...  pixel775  pixel776  pixel777  pixel778  pixel779  pixel780  \
0       0  ...         3         0         0         0         0         1   
1       0  ...       203       214       166         0         0         0   
2       0  ...       164       177       163         0         0         1   
3       0  ...         9        10         9         9         8         1   
4       0  ...         0         0         0         0         0         0   

   pixel781  pixel782  pixel783  pixel784  
0         0         0         

In [2]:
# Extract features (X) and labels (y)
X = train_data.drop(columns=["label"]).values / 255.0  # Normalize
y = train_data["label"].values  # Target labels

# Split training set into Training (80%) & Validation (20%) sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Normalize test set
X_test = test_data.values / 255.0

print(f"Training set shape: {X_train.shape}")
print(f"Validation set shape: {X_val.shape}")
print(f"Test set shape: {X_test.shape}")


Training set shape: (9600, 784)
Validation set shape: (2400, 784)
Test set shape: (2000, 784)


In [4]:
class LogisticRegression:
    def __init__(self, input_dim, learning_rate=0.01):
        self.W = np.random.randn(input_dim) * 0.01  # Small random weights
        self.b = 0  # Bias initialized to 0
        self.learning_rate = learning_rate

    def sigmoid(self, z):
        """Sigmoid activation function."""
        return 1 / (1 + np.exp(-z))

    def predict_proba(self, X):
        """Compute probability predictions."""
        z = np.dot(X, self.W) + self.b
        return self.sigmoid(z)

    def predict(self, X):
        """Classify into 0 or 1 based on probability threshold 0.5."""
        return (self.predict_proba(X) >= 0.5).astype(int)

    def compute_loss(self, y_true, y_pred):
        """Binary Cross-Entropy Loss."""
        m = y_true.shape[0]
        loss = -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return loss

    def train(self, X_train, y_train, X_val, y_val, epochs=1000):
        """Train model using Stochastic Gradient Descent (SGD)."""
        m = X_train.shape[0]
        for epoch in range(epochs):
            y_pred_train = self.predict_proba(X_train)
            y_pred_val = self.predict_proba(X_val)

            train_loss = self.compute_loss(y_train, y_pred_train)
            val_loss = self.compute_loss(y_val, y_pred_val)

            # Compute gradients
            dW = np.dot(X_train.T, (y_pred_train - y_train)) / m
            db = np.mean(y_pred_train - y_train)

            # Update weights and bias
            self.W -= self.learning_rate * dW
            self.b -= self.learning_rate * db

            # Print loss every 100 epochs
            if epoch % 100 == 0:
                print(f"Epoch {epoch}: Train Loss = {train_loss:.4f}, Validation Loss = {val_loss:.4f}")


In [5]:
# Initialize and train model
model = LogisticRegression(input_dim=X_train.shape[1], learning_rate=0.01)
model.train(X_train, y_train, X_val, y_val, epochs=1000)


Epoch 0: Train Loss = 0.7030, Validation Loss = 0.7029
Epoch 100: Train Loss = 0.1648, Validation Loss = 0.1629
Epoch 200: Train Loss = 0.1283, Validation Loss = 0.1258
Epoch 300: Train Loss = 0.1142, Validation Loss = 0.1113
Epoch 400: Train Loss = 0.1065, Validation Loss = 0.1034
Epoch 500: Train Loss = 0.1016, Validation Loss = 0.0984
Epoch 600: Train Loss = 0.0982, Validation Loss = 0.0948
Epoch 700: Train Loss = 0.0956, Validation Loss = 0.0922
Epoch 800: Train Loss = 0.0936, Validation Loss = 0.0901
Epoch 900: Train Loss = 0.0920, Validation Loss = 0.0885


In [6]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Make predictions on validation set
y_pred_val = model.predict(X_val)

# Compute evaluation metrics
accuracy = accuracy_score(y_val, y_pred_val)
precision = precision_score(y_val, y_pred_val)
recall = recall_score(y_val, y_pred_val)
f1 = f1_score(y_val, y_pred_val)

print(f"Accuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1-score: {f1:.4f}")


Accuracy: 0.9742
Precision: 0.9719
Recall: 0.9768
F1-score: 0.9743


In [30]:
# Predict on test set
y_test_pred_proba = model.predict_proba(X_test)

# Save submission file
submission_df = pd.DataFrame({
    "id": np.arange(len(y_test_pred_proba)),
    " class": y_test_pred_proba  # Use raw probability values
})

submission_df.to_csv("submission_baseline.csv", index=False)
print("Kaggle submission file saved as 'submission_baseline.csv'.")


Kaggle submission file saved as 'submission_baseline.csv'.


#### Training with more epoch (2000 epochs)

In [10]:
# Train Logistic Regression model with 2000 epochs
model_epochs2000 = LogisticRegression(input_dim=X_train.shape[1], learning_rate=0.01)
model_epochs2000.train(X_train, y_train, X_val, y_val, epochs=2000)  # Increased epochs


Epoch 0: Train Loss = 0.6917, Validation Loss = 0.6910
Epoch 100: Train Loss = 0.1638, Validation Loss = 0.1616
Epoch 200: Train Loss = 0.1278, Validation Loss = 0.1249
Epoch 300: Train Loss = 0.1139, Validation Loss = 0.1106
Epoch 400: Train Loss = 0.1063, Validation Loss = 0.1029
Epoch 500: Train Loss = 0.1014, Validation Loss = 0.0979
Epoch 600: Train Loss = 0.0980, Validation Loss = 0.0944
Epoch 700: Train Loss = 0.0955, Validation Loss = 0.0918
Epoch 800: Train Loss = 0.0935, Validation Loss = 0.0898
Epoch 900: Train Loss = 0.0919, Validation Loss = 0.0882
Epoch 1000: Train Loss = 0.0906, Validation Loss = 0.0868
Epoch 1100: Train Loss = 0.0894, Validation Loss = 0.0857
Epoch 1200: Train Loss = 0.0884, Validation Loss = 0.0847
Epoch 1300: Train Loss = 0.0875, Validation Loss = 0.0838
Epoch 1400: Train Loss = 0.0867, Validation Loss = 0.0830
Epoch 1500: Train Loss = 0.0860, Validation Loss = 0.0823
Epoch 1600: Train Loss = 0.0854, Validation Loss = 0.0817
Epoch 1700: Train Loss = 0

In [24]:
# Predict on test set
y_test_pred_epochs2000 = model_epochs2000.predict_proba(X_test)

# Create submission dataframe
submission_epochs2000 = pd.DataFrame({
    "id": np.arange(len(y_test_pred_epochs2000)),  # Ensure IDs start from 0
    " class": y_test_pred_epochs2000  # Ensure correct column name
})

# Save the CSV file
submission_epochs2000.to_csv("submission_epochs2000.csv", index=False, encoding="utf-8", float_format="%.6f")

print("New Kaggle submission file saved as 'submission_epochs2000.csv'.")


New Kaggle submission file saved as 'submission_epochs2000.csv'.


#### Training with a lower training rate (0.001)

In [12]:
# Train Logistic Regression model with a lower learning rate (0.001)
model_lr0001 = LogisticRegression(input_dim=X_train.shape[1], learning_rate=0.001)
model_lr0001.train(X_train, y_train, X_val, y_val, epochs=1000)  # Standard 1000 epochs


Epoch 0: Train Loss = 0.6937, Validation Loss = 0.6936
Epoch 100: Train Loss = 0.4556, Validation Loss = 0.4550
Epoch 200: Train Loss = 0.3468, Validation Loss = 0.3459
Epoch 300: Train Loss = 0.2875, Validation Loss = 0.2863
Epoch 400: Train Loss = 0.2505, Validation Loss = 0.2491
Epoch 500: Train Loss = 0.2252, Validation Loss = 0.2237
Epoch 600: Train Loss = 0.2069, Validation Loss = 0.2052
Epoch 700: Train Loss = 0.1929, Validation Loss = 0.1911
Epoch 800: Train Loss = 0.1819, Validation Loss = 0.1799
Epoch 900: Train Loss = 0.1730, Validation Loss = 0.1709


In [25]:
# Predict on test set
y_test_pred_lr0001 = model_lr0001.predict_proba(X_test)

# Create submission dataframe
submission_lr0001 = pd.DataFrame({
    "id": np.arange(len(y_test_pred_lr0001)),  # Ensure IDs start from 0
    " class": y_test_pred_lr0001  # Ensure correct column name
})

# Save the CSV file
submission_lr0001.to_csv("submission_lr0001.csv", index=False, encoding="utf-8", float_format="%.6f")

print("New Kaggle submission file saved as 'submission_lr0001.csv'.")


New Kaggle submission file saved as 'submission_lr0001.csv'.


Reducing the learning rate too much caused the model to learn too slowly, leading to poorer performance.

#### Training with Mini-Batch Gradient Descent (Batch Size = 64)

1. Learning Rate = 0.01 (since 0.001 was too slow)
2. Batch Size = 64 (instead of full-batch training)
3. Epochs = 1000

In [16]:
class LogisticRegressionMiniBatch:
    def __init__(self, input_dim, learning_rate=0.01, batch_size=64):
        self.W = np.random.randn(input_dim) * 0.01
        self.b = 0
        self.learning_rate = learning_rate
        self.batch_size = batch_size

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def train(self, X_train, y_train, X_val, y_val, epochs=1000):
        m = X_train.shape[0]
        for epoch in range(epochs):
            indices = np.random.permutation(m)  # Shuffle data
            X_train, y_train = X_train[indices], y_train[indices]

            for i in range(0, m, self.batch_size):
                X_batch = X_train[i:i+self.batch_size]
                y_batch = y_train[i:i+self.batch_size]

                y_pred_batch = self.sigmoid(np.dot(X_batch, self.W) + self.b)

                dW = np.dot(X_batch.T, (y_pred_batch - y_batch)) / self.batch_size
                db = np.mean(y_pred_batch - y_batch)

                self.W -= self.learning_rate * dW
                self.b -= self.learning_rate * db

# Train the Mini-Batch SGD Model
model_minibatch = LogisticRegressionMiniBatch(input_dim=X_train.shape[1], learning_rate=0.01, batch_size=64)
model_minibatch.train(X_train, y_train, X_val, y_val, epochs=1000)


In [26]:
# Predict on test set
y_test_pred_minibatch = model_minibatch.sigmoid(np.dot(X_test, model_minibatch.W) + model_minibatch.b)

# Create submission dataframe
submission_minibatch = pd.DataFrame({
    "id": np.arange(len(y_test_pred_minibatch)),  # Ensure IDs start from 0
    " class": y_test_pred_minibatch  # Ensure correct column name
})

# Save the CSV file
submission_minibatch.to_csv("submission_minibatch.csv", index=False, encoding="utf-8", float_format="%.6f")

print("New Kaggle submission file saved as 'submission_minibatch.csv'.")


New Kaggle submission file saved as 'submission_minibatch.csv'.


#### Training with L2 Regularization (λ = 0.1)

In [18]:
class LogisticRegressionL2:
    def __init__(self, input_dim, learning_rate=0.01, reg_lambda=0.1):
        self.W = np.random.randn(input_dim) * 0.01
        self.b = 0
        self.learning_rate = learning_rate
        self.reg_lambda = reg_lambda  # L2 Regularization term

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def train(self, X_train, y_train, X_val, y_val, epochs=1000):
        m = X_train.shape[0]
        for epoch in range(epochs):
            y_pred = self.sigmoid(np.dot(X_train, self.W) + self.b)
            
            # Compute gradients with L2 Regularization
            dW = np.dot(X_train.T, (y_pred - y_train)) / m + self.reg_lambda * self.W
            db = np.mean(y_pred - y_train)

            self.W -= self.learning_rate * dW
            self.b -= self.learning_rate * db

# Train the L2 Regularized Model
model_l2 = LogisticRegressionL2(input_dim=X_train.shape[1], learning_rate=0.01, reg_lambda=0.1)
model_l2.train(X_train, y_train, X_val, y_val, epochs=1000)


In [27]:
# Predict on test set
y_test_pred_l2 = model_l2.sigmoid(np.dot(X_test, model_l2.W) + model_l2.b)

# Create submission dataframe
submission_l2 = pd.DataFrame({
    "id": np.arange(len(y_test_pred_l2)),  # Ensure IDs start from 0
    " class": y_test_pred_l2  # Ensure correct column name
})

# Save the CSV file
submission_l2.to_csv("submission_l2.csv", index=False, encoding="utf-8", float_format="%.6f")

print("New Kaggle submission file saved as 'submission_l2.csv'.")


New Kaggle submission file saved as 'submission_l2.csv'.


#### Training with Mini-Batch Gradient Descent (Batch Size = 32)

In [20]:
# Mini-Batch Gradient Descent with Batch Size = 32
model_minibatch_32 = LogisticRegressionMiniBatch(input_dim=X_train.shape[1], learning_rate=0.01, batch_size=32)
model_minibatch_32.train(X_train, y_train, X_val, y_val, epochs=1000)


In [28]:
# Predict on test set
y_test_pred_minibatch_32 = model_minibatch_32.sigmoid(np.dot(X_test, model_minibatch_32.W) + model_minibatch_32.b)

# Create submission dataframe
submission_minibatch_32 = pd.DataFrame({
    "id": np.arange(len(y_test_pred_minibatch_32)),  # Ensure IDs start from 0
    " class": y_test_pred_minibatch_32  # Ensure correct column name
})

# Save the CSV file
submission_minibatch_32.to_csv("submission_minibatch_32.csv", index=False, encoding="utf-8", float_format="%.6f")

print("New Kaggle submission file saved as 'submission_minibatch_32.csv'.")


New Kaggle submission file saved as 'submission_minibatch_32.csv'.


#### Training with Learning Rate Decay

1. Initial LR: 0.01
2. Decay Rate: 0.99 (Each epoch, LR = LR * 0.99)
3. Epochs: 1000


In [22]:
class LogisticRegressionDecay:
    def __init__(self, input_dim, initial_lr=0.01, decay_rate=0.99):
        self.W = np.random.randn(input_dim) * 0.01
        self.b = 0
        self.initial_lr = initial_lr
        self.decay_rate = decay_rate

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def train(self, X_train, y_train, X_val, y_val, epochs=1000):
        m = X_train.shape[0]
        learning_rate = self.initial_lr  # Start with initial LR

        for epoch in range(epochs):
            y_pred = self.sigmoid(np.dot(X_train, self.W) + self.b)

            dW = np.dot(X_train.T, (y_pred - y_train)) / m
            db = np.mean(y_pred - y_train)

            # Apply learning rate decay
            learning_rate *= self.decay_rate  # Reduce LR in each epoch

            self.W -= learning_rate * dW
            self.b -= learning_rate * db

# Train model with Learning Rate Decay
model_decay = LogisticRegressionDecay(input_dim=X_train.shape[1], initial_lr=0.01, decay_rate=0.99)
model_decay.train(X_train, y_train, X_val, y_val, epochs=1000)


In [29]:
# Predict on test set
y_test_pred_decay = model_decay.sigmoid(np.dot(X_test, model_decay.W) + model_decay.b)

# Create submission dataframe
submission_decay = pd.DataFrame({
    "id": np.arange(len(y_test_pred_decay)),  # Ensure IDs start from 0
    " class": y_test_pred_decay  # Ensure correct column name
})

# Save the CSV file
submission_decay.to_csv("submission_decay.csv", index=False, encoding="utf-8", float_format="%.6f")

print("New Kaggle submission file saved as 'submission_decay.csv'.")


New Kaggle submission file saved as 'submission_decay.csv'.
