<a href="https://colab.research.google.com/github/AbuTalhaGT/ML-Algorithms/blob/main/Experimenting_with_PCA_on_MNIST_Dataset.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### 1. Fit PCA and Transform Data


In [20]:
import numpy as np
import pandas as pd
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
import torchvision.transforms as transforms
import torchvision.datasets as datasets

# Load MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Lambda(lambda x: x.view(-1))  # Flatten images to 1D vectors
])
mnist = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# Convert dataset to numpy arrays
images_np = mnist.data.numpy().reshape(-1, 28*28)
labels_np = np.array(mnist.targets)

# Split data into training and testing
X_train, X_test, y_train, y_test = train_test_split(images_np, labels_np, test_size=0.3, random_state=42)

# Define PCA pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),  # Standardize features
    ('pca', PCA(n_components=60))  # Apply PCA and reduce to # of components
])

# Fit PCA on training data and transform both training and testing data
pipeline.fit(X_train)
X_train_pca = pipeline.transform(X_train)
X_test_pca = pipeline.transform(X_test)

# Create DataFrames from the PCA-transformed data
df_train_pca = pd.DataFrame(X_train_pca, columns=[f'PC{i+1}' for i in range(X_train_pca.shape[1])])
df_test_pca = pd.DataFrame(X_test_pca, columns=[f'PC{i+1}' for i in range(X_test_pca.shape[1])])

# Add labels to DataFrames
df_train_pca['Label'] = y_train
df_test_pca['Label'] = y_test

# Optional: Save DataFrames to CSV files
df_train_pca.to_csv('train_pca_data.csv', index=False)
df_test_pca.to_csv('test_pca_data.csv', index=False)


### 2. Use DataFrames for Deep Learning
Once you have your PCA-transformed data in DataFrames, you can use them as inputs to your deep learning models. If you’re using a deep learning framework like PyTorch or TensorFlow, you’ll need to convert these DataFrames back to tensors or appropriate data structures.


In [21]:
import torch
from torch.utils.data import TensorDataset, DataLoader

# Convert DataFrame to tensors
X_train_tensor = torch.tensor(df_train_pca.drop(columns=['Label']).values, dtype=torch.float32)
y_train_tensor = torch.tensor(df_train_pca['Label'].values, dtype=torch.long)

X_test_tensor = torch.tensor(df_test_pca.drop(columns=['Label']).values, dtype=torch.float32)
y_test_tensor = torch.tensor(df_test_pca['Label'].values, dtype=torch.long)

# Create datasets and dataloaders
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

### Define the Deep Learning Model


In [22]:
import torch.nn as nn
import torch.optim as optim

class DeepNN(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim):
        super(DeepNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.fc3 = nn.Linear(hidden_dim2, hidden_dim3)
        self.fc4 = nn.Linear(hidden_dim3, output_dim)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Define model parameters
input_dim = X_train_tensor.shape[1]  # Number of PCA components
hidden_dim1 = 128
hidden_dim2 = 64
hidden_dim3 = 32
output_dim = 10  # Number of classes (digits 0-9)

model = DeepNN(input_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim)

### Train the model

In [23]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}')

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs=25)

Epoch [1/25], Loss: 0.3972658769396251
Epoch [2/25], Loss: 0.1551111353705871
Epoch [3/25], Loss: 0.11958030009815183
Epoch [4/25], Loss: 0.10073584007720153
Epoch [5/25], Loss: 0.08107863050209332
Epoch [6/25], Loss: 0.06815734269952223
Epoch [7/25], Loss: 0.06569376868859239
Epoch [8/25], Loss: 0.05621995456067383
Epoch [9/25], Loss: 0.04880789893677903
Epoch [10/25], Loss: 0.04561415903466574
Epoch [11/25], Loss: 0.04474029924842434
Epoch [12/25], Loss: 0.038772126180002864
Epoch [13/25], Loss: 0.03296206616171064
Epoch [14/25], Loss: 0.03520423229498494
Epoch [15/25], Loss: 0.02885501035968447
Epoch [16/25], Loss: 0.02877518596677634
Epoch [17/25], Loss: 0.027589902391133098
Epoch [18/25], Loss: 0.026499253755203313
Epoch [19/25], Loss: 0.029269067574033455
Epoch [20/25], Loss: 0.021478017573826946
Epoch [21/25], Loss: 0.019192806706011003
Epoch [22/25], Loss: 0.01797615488713016
Epoch [23/25], Loss: 0.021453896987469353
Epoch [24/25], Loss: 0.020148259046390896
Epoch [25/25], Loss

### Evaluate the model

In [24]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Evaluate the model
accuracy = evaluate_model(model, test_loader)
print(f'Test Accuracy: {accuracy:.2f}%')

Test Accuracy: 97.11%




---



Applying Simple Deep Learning Model on MNIST Dataset

In [6]:
import torch
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader

# Define transformations (convert to tensor and normalize)
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1] for better training
])

# Load MNIST dataset
train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

# Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [7]:
import torch.nn as nn
import torch.optim as optim

class DeepNN(nn.Module):
    def __init__(self, input_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim):
        super(DeepNN, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim1)
        self.fc2 = nn.Linear(hidden_dim1, hidden_dim2)
        self.fc3 = nn.Linear(hidden_dim2, hidden_dim3)
        self.fc4 = nn.Linear(hidden_dim3, output_dim)
        self.relu = nn.ReLU()
        self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
        x = x.view(-1, 28*28)  # Flatten the input images
        x = self.relu(self.fc1(x))
        x = self.relu(self.fc2(x))
        x = self.relu(self.fc3(x))
        x = self.fc4(x)
        return x

# Define model parameters
input_dim = 28 * 28  # Flattened image size
hidden_dim1 = 128
hidden_dim2 = 64
hidden_dim3 = 32
output_dim = 10  # Number of classes (digits 0-9)

model = DeepNN(input_dim, hidden_dim1, hidden_dim2, hidden_dim3, output_dim)

In [8]:
def train_model(model, train_loader, criterion, optimizer, num_epochs=10):
    model.train()
    for epoch in range(num_epochs):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader)}')

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Train the model
train_model(model, train_loader, criterion, optimizer, num_epochs=25)

Epoch [1/25], Loss: 0.4627984726845201
Epoch [2/25], Loss: 0.21199782742564677
Epoch [3/25], Loss: 0.15271694261405958
Epoch [4/25], Loss: 0.12340187979862094
Epoch [5/25], Loss: 0.10611165773425339
Epoch [6/25], Loss: 0.0916575995680473
Epoch [7/25], Loss: 0.0841807268194056
Epoch [8/25], Loss: 0.0727729064880062
Epoch [9/25], Loss: 0.06491103728956132
Epoch [10/25], Loss: 0.061204470721741656
Epoch [11/25], Loss: 0.056300457707531296
Epoch [12/25], Loss: 0.051989550558368125
Epoch [13/25], Loss: 0.04763239871720219
Epoch [14/25], Loss: 0.047648696751241434
Epoch [15/25], Loss: 0.040439538541187876
Epoch [16/25], Loss: 0.03959544482490749
Epoch [17/25], Loss: 0.0374382527348442
Epoch [18/25], Loss: 0.03460833887356882
Epoch [19/25], Loss: 0.03507349035478751
Epoch [20/25], Loss: 0.03248244476463233
Epoch [21/25], Loss: 0.0307590881311393
Epoch [22/25], Loss: 0.03156687504432446
Epoch [23/25], Loss: 0.026763104485238776
Epoch [24/25], Loss: 0.027490515014730894
Epoch [25/25], Loss: 0.0

In [9]:
def evaluate_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Evaluate the model
accuracy = evaluate_model(model, test_loader)
print(f'Test Accuracy: {accuracy:.2f}%')

Test Accuracy: 97.32%
