
Demo to Raghu

In [None]:
from tensorflow.keras.datasets import mnist

(x_train, y_train), (x_test, y_test) = mnist.load_data()

print(f"x_train shape: {x_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"x_test shape: {x_test.shape}")
print(f"y_test shape: {y_test.shape}")

x_train shape: (60000, 28, 28)
y_train shape: (60000,)
x_test shape: (10000, 28, 28)
y_test shape: (10000,)


In [None]:
from sklearn.preprocessing import StandardScaler

x_train_reshaped = x_train.reshape(x_train.shape[0], -1)
x_test_reshaped = x_test.reshape(x_test.shape[0], -1)

scaler = StandardScaler()

x_train_scaled = scaler.fit_transform(x_train_reshaped)
x_test_scaled = scaler.transform(x_test_reshaped)

print(f"x_train_scaled shape: {x_train_scaled.shape}")
print(f"x_test_scaled shape: {x_test_scaled.shape}")

x_train_scaled shape: (60000, 784)
x_test_scaled shape: (10000, 784)


In [None]:
import numpy as np
import matplotlib.pyplot as plt

def my_pca(X, D):

    N = X.shape[0]
  
    covariance_matrix = np.dot(X.T, X) / (N - 1)
    
    eigenvalues, eigenvectors = np.linalg.eigh(covariance_matrix)
    
    # sot eigien values in decreasing order
    sorted_idx = np.argsort(eigenvalues)[::-1]
    eigenvalues = eigenvalues[sorted_idx]
    eigenvectors = eigenvectors[:, sorted_idx]
    
    #select D eigenvectors
    W = eigenvectors[:, :D]  # shape (d, D)
    X_reduced = np.dot(X, W)  # projection to new coordinate system, shape (N, D)
    
    return X_reduced, W, eigenvalues


X_train_reduced_5, W5, eigenvalues = my_pca(x_train_scaled, D=5)
X_test_reduced_5 = np.dot(x_test_scaled, W5)

X_train_reduced_20, W20, eigenvalues = my_pca(x_train_scaled, D=20)
X_test_reduced_20 = np.dot(x_test_scaled, W20)

print("X_train_reduced_5 shape:", X_train_reduced_5.shape)   
print("X_test_reduced_5 shape:", X_test_reduced_5.shape)   
print("X_reduced_20 shape:", X_train_reduced_20.shape) 
print("X_reduced_20 shape:", X_test_reduced_20.shape) 


X_train_reduced_5 shape: (60000, 5)
X_test_reduced_5 shape: (10000, 5)
X_reduced_20 shape: (60000, 20)
X_reduced_20 shape: (10000, 20)


In [None]:
from sklearn.linear_model import LogisticRegression

model_lr = LogisticRegression(
    penalty='l2',
    C=1.0,          
    solver='lbfgs', 
    max_iter=1000,  
    random_state=42
)

In [None]:
model_lr.fit(X_train_reduced_5, y_train)

from sklearn.metrics import accuracy_score, classification_report

y_pred = model_lr.predict(X_test_reduced_5)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.6800
Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.81      0.79       980
           1       0.88      0.95      0.91      1135
           2       0.66      0.60      0.63      1032
           3       0.62      0.74      0.67      1010
           4       0.65      0.63      0.64       982
           5       0.47      0.29      0.36       892
           6       0.80      0.83      0.81       958
           7       0.73      0.77      0.75      1028
           8       0.48      0.47      0.48       974
           9       0.59      0.63      0.61      1009

    accuracy                           0.68     10000
   macro avg       0.67      0.67      0.67     10000
weighted avg       0.67      0.68      0.67     10000



In [6]:
model_lr.fit(X_train_reduced_20, y_train)

from sklearn.metrics import accuracy_score, classification_report
y_pred = model_lr.predict(X_test_reduced_20)
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.8721
Classification Report:
              precision    recall  f1-score   support

           0       0.91      0.94      0.92       980
           1       0.95      0.97      0.96      1135
           2       0.89      0.84      0.86      1032
           3       0.84      0.86      0.85      1010
           4       0.87      0.89      0.88       982
           5       0.81      0.79      0.80       892
           6       0.90      0.91      0.91       958
           7       0.89      0.87      0.88      1028
           8       0.82      0.80      0.81       974
           9       0.82      0.84      0.83      1009

    accuracy                           0.87     10000
   macro avg       0.87      0.87      0.87     10000
weighted avg       0.87      0.87      0.87     10000



In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

model = DecisionTreeClassifier(random_state=42, max_depth=10)  
model.fit(X_train_reduced_5, y_train)

y_pred = model.predict(X_test_reduced_5)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.6947
Classification Report:
              precision    recall  f1-score   support

           0       0.78      0.84      0.81       980
           1       0.94      0.95      0.94      1135
           2       0.68      0.71      0.69      1032
           3       0.66      0.67      0.67      1010
           4       0.61      0.61      0.61       982
           5       0.53      0.39      0.45       892
           6       0.88      0.83      0.86       958
           7       0.74      0.77      0.75      1028
           8       0.45      0.50      0.47       974
           9       0.60      0.63      0.62      1009

    accuracy                           0.69     10000
   macro avg       0.69      0.69      0.69     10000
weighted avg       0.69      0.69      0.69     10000



In [None]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

model = DecisionTreeClassifier(random_state=42, max_depth=10)  
model.fit(X_train_reduced_20, y_train)

y_pred = model.predict(X_test_reduced_20)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:")
print(classification_report(y_test, y_pred))

Accuracy: 0.8027
Classification Report:
              precision    recall  f1-score   support

           0       0.88      0.86      0.87       980
           1       0.96      0.94      0.95      1135
           2       0.80      0.78      0.79      1032
           3       0.78      0.78      0.78      1010
           4       0.73      0.80      0.77       982
           5       0.79      0.68      0.73       892
           6       0.90      0.87      0.89       958
           7       0.87      0.79      0.83      1028
           8       0.60      0.75      0.67       974
           9       0.74      0.74      0.74      1009

    accuracy                           0.80     10000
   macro avg       0.81      0.80      0.80     10000
weighted avg       0.81      0.80      0.80     10000

