In [4]:
# Load the training and test data from CSV files (if not already loaded)
train_in = pd.read_csv('train_in.csv', header=None)
train_out = pd.read_csv('train_out.csv', header=None)
test_in = pd.read_csv('test_in.csv', header=None)
test_out = pd.read_csv('test_out.csv', header=None)

In [5]:
# Task 1
import numpy as np
import pandas as pd
from scipy.spatial import distance

# Calculate the centers for each digit class
centers = []
for digit in range(10):
    digit_images = train_in[train_out[0] == digit]
    center = digit_images.mean(axis=0)
    centers.append(center)
# Calculate distances between centers
distances = np.zeros((10, 10))
for i in range(10):
    for j in range(i+1, 10):
        dist = distance.euclidean(centers[i], centers[j])
        distances[i, j] = dist
        distances[j, i] = dist

# Function to classify a new image based on the nearest center
def classify_image(image):
    min_distance = float('inf')
    predicted_digit = None
    for digit in range(10):
        dist = distance.euclidean(image, centers[digit])
        if dist < min_distance:
            min_distance = dist
            predicted_digit = digit
    return predicted_digit

# Classify test images and calculate accuracy
correct_predictions = 0
total_images = test_in.shape[0]
for i in range(total_images):
    test_image = test_in.iloc[i, :].values
    true_label = test_out.iloc[i, 0]
    predicted_label = classify_image(test_image)
    if predicted_label == true_label:
        correct_predictions += 1

accuracy = correct_predictions / total_images
print(f'Accuracy: {accuracy * 100:.2f}%')


Accuracy: 80.40%


In [12]:
# Task 2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import umap

# Combine input and output data
mnist_data = pd.concat([train_in, train_out], axis=1)

# Extract labels and features
labels = mnist_data.iloc[:, -1]
features = mnist_data.iloc[:, :-1]

# Perform PCA dimensionality reduction
pca = PCA(n_components=2)
pca_result = pca.fit_transform(features)

# Perform t-SNE dimensionality reduction
tsne = TSNE(n_components=2, perplexity=30, n_iter=300)
tsne_result = tsne.fit_transform(features)

# UMAP
ump = umap.UMAP()

# Plot the results
plt.figure(figsize=(12, 6))
plt.subplot(131)
plt.scatter(pca_result[:, 0], pca_result[:, 1], c=labels, cmap=plt.colormaps['viridis'])
plt.title('PCA')

plt.subplot(132)
plt.scatter(tsne_result[:, 0], tsne_result[:, 1], c=labels, cmap=plt.colormaps['viridis'])
plt.title('t-SNE')

plt.subplot(133)


plt.tight_layout()
plt.show()


AttributeError: module 'umap' has no attribute 'UMAP'

In [7]:
# Task 3
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score

# Calculate the mean for each digit class in the training set
class_means = []

for digit in range(10):
    class_data = train_data[train_labels == digit].dropna()
    class_mean = class_data.mean(axis=0).values
    class_means.append(class_mean)

# Implement the Nearest Mean Classifier
def nearest_mean_classifier(sample):
    distances = [np.linalg.norm(sample - mean) for mean in class_means]
    return np.argmin(distances)

# Classify all points in the training set
train_predictions = [nearest_mean_classifier(sample) for _, sample in train_data.iterrows()]

# Calculate the accuracy on the training set
train_accuracy = accuracy_score(train_labels, train_predictions)

# Classify all points in the test set using training set centers
test_predictions = [nearest_mean_classifier(sample) for _, sample in test_data.iterrows()]

# Calculate the accuracy on the test set
test_accuracy = accuracy_score(test_labels, test_predictions)

print(f"Training Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")


Training Accuracy: 18.69%
Test Accuracy: 22.40%


In [11]:
# Task 4
from sklearn.neighbors import KNeighborsClassifier

neigh = KNeighborsClassifier(n_neighbors=3)
neigh.fit(X, y)

{'data': array([[5.1, 3.5, 1.4, 0.2],
       [4.9, 3. , 1.4, 0.2],
       [4.7, 3.2, 1.3, 0.2],
       [4.6, 3.1, 1.5, 0.2],
       [5. , 3.6, 1.4, 0.2],
       [5.4, 3.9, 1.7, 0.4],
       [4.6, 3.4, 1.4, 0.3],
       [5. , 3.4, 1.5, 0.2],
       [4.4, 2.9, 1.4, 0.2],
       [4.9, 3.1, 1.5, 0.1],
       [5.4, 3.7, 1.5, 0.2],
       [4.8, 3.4, 1.6, 0.2],
       [4.8, 3. , 1.4, 0.1],
       [4.3, 3. , 1.1, 0.1],
       [5.8, 4. , 1.2, 0.2],
       [5.7, 4.4, 1.5, 0.4],
       [5.4, 3.9, 1.3, 0.4],
       [5.1, 3.5, 1.4, 0.3],
       [5.7, 3.8, 1.7, 0.3],
       [5.1, 3.8, 1.5, 0.3],
       [5.4, 3.4, 1.7, 0.2],
       [5.1, 3.7, 1.5, 0.4],
       [4.6, 3.6, 1. , 0.2],
       [5.1, 3.3, 1.7, 0.5],
       [4.8, 3.4, 1.9, 0.2],
       [5. , 3. , 1.6, 0.2],
       [5. , 3.4, 1.6, 0.4],
       [5.2, 3.5, 1.5, 0.2],
       [5.2, 3.4, 1.4, 0.2],
       [4.7, 3.2, 1.6, 0.2],
       [4.8, 3.1, 1.6, 0.2],
       [5.4, 3.4, 1.5, 0.4],
       [5.2, 4.1, 1.5, 0.1],
       [5.5, 4.2, 1.4, 0.2],
     