In [11]:
import numpy as np
import scipy.io
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression, Perceptron
from sklearn.metrics import accuracy_score
from sklearn.neural_network import MLPClassifier
import torch
import torch.nn as nn
import torch.optim as optim

In [2]:
MNIST_data = scipy.io.loadmat('C:\My Files\micromaster\Machine Learning\HW2\mnist.mat')
train_data = MNIST_data["train"]
test_data = MNIST_data["test"]
X_train = train_data[:, :784]
y_train = train_data[:, 784]
X_test = test_data[:, :784]
y_test = test_data[:, 784]
print(f"the shapes are:\n X_train: {X_train.shape}, y_train: {y_train.shape}\n X_test: {X_test.shape}, y_test: {y_test.shape}")

  MNIST_data = scipy.io.loadmat('C:\My Files\micromaster\Machine Learning\HW2\mnist.mat')


the shapes are:
 X_train: (8000, 784), y_train: (8000,)
 X_test: (2000, 784), y_test: (2000,)


In [8]:
lambda_values = np.array([0.01, 0.1, 1, 10, 100])
c_values = 1/lambda_values
param_grid = {'C': c_values}
log_reg = LogisticRegression(penalty='l2', max_iter=100, solver='liblinear')
grid_search = GridSearchCV(log_reg, param_grid, cv=10, scoring='accuracy')
grid_search.fit(X_train, y_train)
best_C = grid_search.best_params_['C']
print(f"the Best lambda is : {1/best_C}")
best_model = grid_search.best_estimator_
train_ypred = best_model.predict(X_train)
train_accuracy = accuracy_score(y_train, train_ypred)
print(f"Train Accuracy is: {train_accuracy:.4f}")
test_ypred = best_model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_ypred)
print(f"Test Accuracy is: {test_accuracy:.4f}")

the Best lambda is : 1.0
Train Accuracy is: 0.9565
Test Accuracy is: 0.8955


In [12]:
train38_data = train_data[(train_data[:, -1] == 3) | (train_data[:, -1] == 8)]
X38_train = train38_data[:, :784]
y38_train = train38_data[:, 784]
train3_data = train_data[train_data[:, -1] == 3]
train8_data = train_data[train_data[:, -1] == 8]
print(f"in train data the count for 3: {train3_data.shape[0]}\nin train data the count for 8: {train8_data.shape[0]}")
test38_data = test_data[(test_data[:, -1] == 3) | (test_data[:, -1] == 8)]
X38_test = test38_data[:, :784]
y38_test = test38_data[:, 784]
test3_data = test_data[test_data[:, -1] == 3]
test8_data = test_data[test_data[:, -1] == 8]
print(f"in test data the count for 3: {test3_data.shape[0]}\nin test data the count for 8:  {test8_data.shape[0]}")
print(f"the shapes are:\n for X38_train: {X38_train.shape}, for y38_train:{y38_train.shape},\n for X38_test: {X38_test.shape}, for y38_test: {y38_test.shape}")

in train data the count for 3: 818
in train data the count for 8: 803
in test data the count for 3: 182
in test data the count for 8:  197
the shapes are:
 for X38_train: (1621, 784), for y38_train:(1621,),
 for X38_test: (379, 784), for y38_test: (379,)


In [19]:
X_train_tensor = torch.tensor(X38_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y38_train, dtype=torch.long)
X_test_tensor = torch.tensor(X38_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y38_test, dtype=torch.long)

class Perceptron(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(Perceptron, self).__init__()
        self.fc = nn.Linear(input_dim, output_dim)
    def forward(self, x):
        return self.fc(x)

input_dim = X_train_tensor.shape[1]
output_dim = len(set(y_train_tensor))
model = Perceptron(input_dim, output_dim)

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001)

num_epochs = 1000
for epoch in range(num_epochs):
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

with torch.no_grad():
    train_ypred = model(X_train_tensor)
    _, train_predicted = torch.max(train_ypred, 1)
    train_accuracy = accuracy_score(y38_train, train_predicted.numpy())
    print(f'Train Accuracy: {train_accuracy:.4f}')
    test_ypred = model(X_test_tensor)
    _, test_predicted = torch.max(test_ypred, 1)
    test_accuracy = accuracy_score(y38_test, test_predicted.numpy())
    print(f'Test Accuracy: {test_accuracy:.4f}')

Epoch [100/1000], Loss: 4.7845
Epoch [200/1000], Loss: 2.5476
Epoch [300/1000], Loss: 1.4515
Epoch [400/1000], Loss: 1.0052
Epoch [500/1000], Loss: 0.7868
Epoch [600/1000], Loss: 0.6600
Epoch [700/1000], Loss: 0.5773
Epoch [800/1000], Loss: 0.5189
Epoch [900/1000], Loss: 0.4752
Epoch [1000/1000], Loss: 0.4411
Train Accuracy: 0.9204
Test Accuracy: 0.9103


In [13]:
perceptron = Perceptron(eta0=0.001, max_iter=100)
perceptron.fit(X38_train, y38_train)
y_train_pred = perceptron.predict(X38_train)
y_test_pred = perceptron.predict(X38_test)
train_accuracy = accuracy_score(y38_train, y_train_pred)
test_accuracy = accuracy_score(y38_test, y_test_pred)
print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


Train Accuracy: 0.9809
Test Accuracy: 0.9551


In [20]:
model = MLPClassifier(hidden_layer_sizes=(100,), learning_rate_init=0.001, max_iter=200)
model.fit(X_train, y_train)
train_y_pred = model.predict(X_train)
train_accuracy = accuracy_score(y_train, train_y_pred)
print(f'Train Accuracy: {train_accuracy:.4f}')
test_y_pred = model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_y_pred)
print(f'Test Accuracy: {test_accuracy:.4f}')

Train Accuracy: 1.0000
Test Accuracy: 0.9465
