<a href="https://colab.research.google.com/github/Gautam0507/ECE034-Assignments/blob/main/Assignment-3/Assignment_3_GPU.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Assignment 3

---
Name: N Gautam

Roll No: 2210110411

---
Name: Risshi Kamalesh K

Roll No: 2210110511

# Importing Libraries

In [None]:
import time
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
import cupy as cp
from cuml.svm import SVC

# Importing data

In [None]:
df = pd.read_csv('/content/mnist.csv')
print(df.shape)

(42000, 785)


# Handling Null values

In [None]:
df.dropna(inplace = True)
print(df.shape)

(42000, 785)


# Seaprate labels and Input values

In [None]:
Y = df['label']
X = df.drop('label', axis = 1)

In [None]:
print(X.shape)
print(Y.shape)

(42000, 784)
(42000,)


# Scaling the values

In [None]:
scaler = StandardScaler()
scaler.fit_transform(X)

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

# Split into train and test samples

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, train_size=0.5, random_state=42)

In [None]:
print("Training data length:", len(X_train))
print("Testing data length:", len(X_test))

Training data length: 21000
Testing data length: 21000


# Moving code to GPU for faster training

In [None]:
X_train_gpu = cp.asarray(X_train, dtype=cp.float32)
Y_train_gpu = cp.asarray(Y_train, dtype=cp.float32)
X_test_gpu = cp.asarray(X_test, dtype=cp.float32)

# Training SVM with Linear Kernel

In [None]:
# Section 2: Linear SVM Classification (Part a) - GPU Version
def linear_svm_classification(C_values):
  results = {}

  for C in C_values:
    start_time = time.time()

    # Linear SVM Classifier using cuML
    linear_svm = SVC(kernel='linear', C=C)
    linear_svm.fit(X_train_gpu, Y_train_gpu)

    # Predictions
    y_pred_gpu = linear_svm.predict(X_test_gpu)

    end_time = time.time()
    # Convert predictions back to CPU
    y_pred = cp.asnumpy(y_pred_gpu)

    # Performance metrics
    accuracy = accuracy_score(Y_test, y_pred)

    results[C] = {
      'accuracy': accuracy,
      'computation_time': end_time - start_time
    }

  # Print results
  print("GPU Linear SVM Results:")
  for C, result in results.items():
    print(f"C = {C}:"
    f"  Accuracy: {result['accuracy']:.4f}"
    f"  Computation Time: {result['computation_time']:.4f} seconds")

  return results

In [None]:
C_values = np.array([0.1, 1, 10])
linear_results = linear_svm_classification(C_values)

GPU Linear SVM Results:
C = 0.1:  Accuracy: 0.9109  Computation Time: 11.1521 seconds
C = 1.0:  Accuracy: 0.9109  Computation Time: 10.8095 seconds
C = 10.0:  Accuracy: 0.9109  Computation Time: 10.7819 seconds


# Training SVM with Gaussian Kernel (RBF)

In [None]:
def rbf_svm_classification(C_gamma_combinations):
  results = {}

  for i, (C, gamma) in enumerate(C_gamma_combinations):
    start_time = time.time()

    # GPU-accelerated RBF Kernel SVM Classifier using cuML
    rbf_svm = SVC(kernel='rbf', C=C, gamma=gamma)
    rbf_svm.fit(X_train_gpu, Y_train_gpu)

    # Predictions
    y_pred_gpu = rbf_svm.predict(X_test_gpu)

    # Convert predictions back to CPU for evaluation
    y_pred = cp.asnumpy(y_pred_gpu)
    end_time = time.time()

    # Performance metrics
    accuracy = accuracy_score(Y_test, y_pred)
    training_time = end_time - start_time


    results[(C, gamma)] = {
      'accuracy': accuracy,
      'computation_time': training_time
    }

  # Print results
  print(" RBF Kernel SVM Results Summary:")
  for (C, gamma), result in results.items():
    print(f"C = {C}, Gamma = {gamma}:"
    f"  Accuracy: {result['accuracy']:.4f}"
    f"  Computation Time: {result['computation_time']:.4f} seconds")

  return results

In [None]:
# Execute RBF Kernel SVM with different C and gamma combinations
C_gamma_combinations = np.array([(0.1, 1), (1, 0.1)])
rbf_results = rbf_svm_classification(C_gamma_combinations)

 RBF Kernel SVM Results Summary:
C = 0.1, Gamma = 1.0:  Accuracy: 0.1110  Computation Time: 9.4210 seconds
C = 1.0, Gamma = 0.1:  Accuracy: 0.1110  Computation Time: 9.7232 seconds


# Train SVM with polynomial kernel

In [None]:
def polynomial_svm_classification(degrees):
  results = {}

  for degree in degrees:
    start_time = time.time()

    # GPU-accelerated RBF Kernel SVM Classifier using cuML
    rbf_svm = SVC(kernel='poly', degree=degree)
    rbf_svm.fit(X_train_gpu, Y_train_gpu)

    # Predictions
    y_pred_gpu = rbf_svm.predict(X_test_gpu)

    # Convert predictions back to CPU for evaluation
    y_pred = cp.asnumpy(y_pred_gpu)
    end_time = time.time()

    # Performance metrics
    accuracy = accuracy_score(Y_test, y_pred)
    training_time = end_time - start_time


    results[degree] = {
      'accuracy': accuracy,
      'computation_time': training_time
    }

  # Print results
  print("Polynomial Kernel SVM Results:")
  for degree, result in results.items():
    print(f"Degree = {degree}:"
    f"  Accuracy: {result['accuracy']:.4f}"
    f"  Computation Time: {result['computation_time']:.4f} seconds")

  return results

In [None]:
degrees = np.array([2, 4])
poly_results = polynomial_svm_classification(degrees)

Polynomial Kernel SVM Results:
Degree = 2:  Accuracy: 0.9669  Computation Time: 4.8492 seconds
Degree = 4:  Accuracy: 0.9549  Computation Time: 5.2588 seconds


# Training Random Forest for comparison

In [None]:
def random_classification():
  start_time = time.time()
  rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
  rf_classifier.fit(X_train, Y_train)
  rf_pred = rf_classifier.predict(X_test)
  end_time = time.time()
  rf_accuracy = accuracy_score(Y_test, rf_pred)

  results = {
    'accuracy' : rf_accuracy,
    'computation_time' : end_time - start_time
  }
  # Print results
  print("Random Forest Classifier:")
  print(f"Accuracy: {rf_accuracy:.4f}"
        f"  Computation Time: {end_time - start_time:.4f} seconds")
  return results

In [None]:
rf_results = random_classification()

Random Forest Classifier:
Accuracy: 0.9582  Computation Time: 13.8070 seconds


# Performance Analysis

In [None]:
def compare_classifiers():
  print("SVM Linear Kernel")
  avg_accuracy = np.mean([result['accuracy'] for result in linear_results.values()])
  avg_time = np.mean([result['computation_time'] for result in linear_results.values()])
  print(f"Average Accuracy: {avg_accuracy:.4f}  Average Time Taken: {avg_time:.4f}")

  print("\nSVM RBF Kernel")
  avg_accuracy = np.mean([result['accuracy'] for result in rbf_results.values()])
  avg_time = np.mean([result['computation_time'] for result in poly_results.values()])
  print(f"Average Accuracy: {avg_accuracy:.4f}  Average Time Taken: {avg_time:.4f}")

  print("\nSVM Polynomial Kernel")
  avg_accuracy = np.mean([result['accuracy'] for result in poly_results.values()])
  avg_time = np.mean([result['computation_time'] for result in poly_results.values()])
  print(f"Average Accuracy: {avg_accuracy:.4f}  Average Time Taken: {avg_time:.4f}")

  print("\nRandom Forest")
  print(f"Average Accuracy: {rf_results['accuracy']:.4f}  Average Time Taken: {rf_results['computation_time']:.4f}")

In [None]:
compare_classifiers()

SVM Linear Kernel
Average Accuracy: 0.9109  Average Time Taken: 10.9145

SVM RBF Kernel
Average Accuracy: 0.1110  Average Time Taken: 5.0540

SVM Polynomial Kernel
Average Accuracy: 0.9609  Average Time Taken: 5.0540

Random Forest
Average Accuracy: 0.9582  Average Time Taken: 13.8070


From the Accuracy and the training time taken we can see that the polynomial kernel gives the best results for this dataset