# ANN

In [1]:
#ANN
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.metrics import accuracy_score, f1_score

# 1. Load MNIST dataset from CSV
df = pd.read_csv('train.csv')

# 2. Split into features (X) and labels (y)
X = df.drop('label', axis=1)
y = df['label']

# 3. Preprocess the data
X = X / 255.0   # normalize pixel values between 0 and 1
y = y.astype(int)  # ensure labels are integers

# 4. Train-Test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Build the ANN model
model = MLPClassifier(hidden_layer_sizes=(128,), activation='relu', solver='adam', max_iter=20, random_state=42)

# 6. Train the model
model.fit(X_train, y_train)

# 7. Predictions
y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# 8. Evaluation
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred, average='weighted')

print(f"Training Accuracy: {train_accuracy:.4f}")
print(f"Testing Accuracy: {test_accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")




Training Accuracy: 0.9980
Testing Accuracy: 0.9698
F1 Score: 0.9698


In [2]:
import matplotlib.pyplot as plt

In [3]:
X = df.iloc[:,1:]
y = df.iloc[:,0]
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=42)

# KNN

In [4]:
X_train.shape
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier()
knn.fit(X_train,y_train)

Apply KNN without reduction (PCA)

In [5]:
import time
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, f1_score

# 1. Load dataset (example, MNIST)
df = pd.read_csv('train.csv')

# 2. Features and Labels
X = df.drop('label', axis=1)
y = df['label']

# 3. Preprocessing
X = X / 255.0
y = y.astype(int)

# 4. Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 5. Standard Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# 6. KNN Model
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

# 7. Predict and Time
start = time.time()
y_test_pred = knn.predict(X_test)
prediction_time = time.time() - start

# 8. Predict on Train Set too
y_train_pred = knn.predict(X_train)

# 9. Evaluation
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred, average='weighted')

# 10. Output
print(f"Prediction Time: {prediction_time:.2f} seconds")
print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")


Prediction Time: 8.69 seconds
Train Accuracy: 0.9669
Test Accuracy: 0.9421
F1 Score: 0.9420


Apply KNN with reduction PCA

In [6]:
import time
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, f1_score

# 1. PCA - Reduce features to 200 components
pca = PCA(n_components=200)
X_train_trf = pca.fit_transform(X_train)
X_test_trf = pca.transform(X_test)

# 2. Train KNN on PCA-transformed data
knn = KNeighborsClassifier(n_neighbors=3)  # You can change n_neighbors if you want
knn.fit(X_train_trf, y_train)

# 3. Predictions with Time Measurement
start_time = time.time()
y_test_pred = knn.predict(X_test_trf)
prediction_time = time.time() - start_time

# 4. Predict on training set also
y_train_pred = knn.predict(X_train_trf)

# 5. Evaluation
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)
f1 = f1_score(y_test, y_test_pred, average='weighted')

print(f"Prediction Time after PCA: {prediction_time:.2f} seconds")
print(f"Train Accuracy after PCA: {train_accuracy:.4f}")
print(f"Test Accuracy after PCA: {test_accuracy:.4f}")
print(f"F1 Score after PCA: {f1:.4f}")


Prediction Time after PCA: 3.25 seconds
Train Accuracy after PCA: 0.9721
Test Accuracy after PCA: 0.9498
F1 Score after PCA: 0.9496


Visualize the data

In [7]:
#Image data
import plotly.express as px
y_train_trf = y_train.astype(str)
fig = px.scatter(x=X_train_trf[:,0],
                 y=X_train_trf[:,1],
                 color=y_train_trf,
                 color_discrete_sequence=px.colors.qualitative.G10
                )
fig.show()


In [8]:
# transforming in 3D
pca = PCA(n_components=3)
X_train_trf = pca.fit_transform(X_train)
X_test_trf = pca.transform(X_test)

In [9]:
X_train_trf

array([[-2.71871908, -0.49060331,  1.13330738],
       [-0.67695957, -6.75295677, -2.33439208],
       [-3.03313131,  6.51049066,  7.49401929],
       ...,
       [ 2.14897688,  0.78225504, -0.74334802],
       [ 1.05945341,  0.94665057,  3.94667294],
       [17.7025476 ,  1.96160622, -4.94435803]])

In [10]:
#Visualize the image data in 3D
import plotly.express as px
y_train_trf = y_train.astype(str)
fig = px.scatter_3d(df, x=X_train_trf[:,0], y=X_train_trf[:,1], z=X_train_trf[:,2],
              color=y_train_trf)
fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20),
    paper_bgcolor="LightSteelBlue",
)
fig.show()