<a href="https://colab.research.google.com/github/FatmaAtta/CNN/blob/main/SL_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow



# Project Rules
### Each step, you test a specific parameter first then after finding the best value, use it in the next steps to find the best of the other parameters

*   Data must be shuffled first
*   batch size > 30
*   start with RelU
* number of layers > 3
* 2D max pooling layer - 2x2 stride - 2x2 kernel size
* optimizer -> stochastic gradient descent with any initial learning rate
* do not use more than 3 CNN layers in any model
* do not use more than 4 FC (fully connected) layers
* start with batch size = 32 or 64 (how many samples model looks at before updating weights)
* number of epochs >= 10  and <=25 maybe (an epoch is one complete pass on the data)
* test with double the batch size AND ( triple OR 4 times)
* use atleast 3 other activation functions
* with best settings reached -> try 2 more optimizers
* put dropout layer anywhere and test 2 different places and different dropout rates
* data input should be 28x28
* output layer size = 10 (corresponding to the different classes)
* mnist dataset
* cross entropy loss



In [None]:
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import numpy as np
from tensorflow.keras.utils import to_categorical


(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.2, random_state=42)

print("Train images shape:       ",x_train.shape)
print("Train labels shape:       ",y_train.shape)
print("Validation images shape:  ",x_valid.shape)
print("Validation labels shape:  ",y_valid.shape)
print("Test images shape:        ",x_test.shape)
print("Test labels shape:        ",y_test.shape)

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/mnist.npz
[1m11490434/11490434[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Train images shape:        (48000, 28, 28)
Train labels shape:        (48000,)
Validation images shape:   (12000, 28, 28)
Validation labels shape:   (12000,)
Test images shape:         (10000, 28, 28)
Test labels shape:         (10000,)


In [None]:
class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']
plt.figure(figsize=(10,10))

for i in range(25):
    plt.subplot(5,5,i+1)
    plt.xticks([])
    plt.yticks([])
    plt.grid(False)
    plt.imshow(x_train[i], cmap=plt.cm.binary)
    plt.xlabel(class_names[y_train[i]])
plt.show()

In [None]:
x_train = x_train.astype('float32')
x_valid = x_valid.astype('float32')
x_test = x_test.astype('float32')

mean = np.mean(x_train)
std = np.std(x_train)

x_train = (x_train-mean)/(std + 1e-7)
x_valid = (x_valid-mean)/(std + 1e-7)
x_test = (x_test-mean)/(std + 1e-7)

y_train = to_categorical(y_train,10)
y_valid = to_categorical(y_valid, 10)
y_test = to_categorical(y_test, 10)

# ANN Model

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
import tensorflow as tf
import time

x_train = x_train.reshape(x_train.shape[0], -1)
x_valid = x_valid.reshape(x_valid.shape[0], -1)
x_test = x_test.reshape(x_test.shape[0], -1)

model = Sequential([
    Dense(512, activation='relu', input_shape=x_train.shape[1:]),
    Dropout(0.2),
    Dense(256, activation='relu'),
    Dense(10, activation='softmax')
])

batch_size = 64
epochs = 12
learning_rate = 0.001
optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate,beta_1= 0.9 ,beta_2= 0.999, epsilon=1e-07)

model.compile(optimizer=optimizer,
              loss='categorical_crossentropy',
              metrics=['accuracy'])
reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.00001)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)


# Test and measure time
train_start = time.time()
history = model.fit(
    x_train, y_train,
    validation_data=(x_valid, y_valid),
    epochs=epochs,
    batch_size=batch_size,
    callbacks=[reduce_lr, early_stopping],
    verbose=2
)
train_end = time.time()
avg_train_time = (train_end - train_start) / epochs

test_times = []
for _ in range(epochs):
    start = time.time()
    model.evaluate(x_test, y_test, verbose=0)
    end = time.time()
    test_times.append(end - start)
avg_test_time = np.mean(test_times)


final_test_loss, final_test_acc = model.evaluate(x_test, y_test, verbose=0)

model.summary()

print("\n--- ANN Results ---")
print(f"Final Test Accuracy: {final_test_acc:.4f}")
print("Validation Accuracy per 1st 5 Epoch:", [f"{acc:.4f}" for acc in history.history['val_accuracy'][:5]])
print(f"Total Parameters: {model.count_params()}")
print(f"Average Training Time per Epoch: {avg_train_time:.4f} seconds")
print(f"Average Testing Time per Epoch: {avg_test_time:.4f} seconds")
print(f"Layers: Input(784) -> Dense(512, relu) -> Dropout(0.2) -> Dense(256, relu) -> Dense(10, softmax)")
print("Learning Rate Used:", learning_rate)

# SVM Model

In [None]:
import numpy as np
import time
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.datasets import fetch_openml
from sklearn.model_selection import train_test_split

(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(-1, 28 * 28).astype("float32") / 255.0
x_test = x_test.reshape(-1, 28 * 28).astype("float32") / 255.0

x_train_svm, _, y_train_svm, _ = train_test_split(x_train, y_train, train_size=8000, stratify=y_train, random_state=42)
x_test_svm, _, y_test_svm, _ = train_test_split(x_test, y_test, train_size=2000, stratify=y_test, random_state=42)


In [None]:
clf = make_pipeline(StandardScaler(), svm.SVC(kernel='rbf', gamma='scale', C=5))
start_train = time.time()
clf.fit(x_train_svm, y_train_svm)
end_train = time.time()
train_time = end_train - start_train

start_test = time.time()
y_pred = clf.predict(x_test_svm)
end_test = time.time()
test_time = end_test - start_test

acc = accuracy_score(y_test_svm, y_pred)

print("\n--- SVM Results ---")
print(f"Test Accuracy: {acc:.4f}")
print(f"Training Time: {train_time:.2f} seconds")
print(f"Testing Time: {test_time:.4f} seconds")

# CNN
* use RelU for now
* 3 layers minimum
* 2D MAX pooling after at least one layer, 2x2 stride, 2x2 kernel
* choose good starting epoch
* optimizer -> SGD
* start with any learning rate
* test different learning rates
* start with batch size = 32, 64

### Testing different learning rates

###Testing different number of CNN layers and CNN parameters
###Testing different number of FC layers


### batch size = 2b

##activation function 1

## activation function 2

#activation function 3


## optimizer 1

##optimizer 2



## dropout layer place 1

## dropout layer place 2