# Model Definition and Evaluation
## Table of Contents
1. [Model Selection](#model-selection)
2. [Feature Engineering](#feature-engineering)
3. [Hyperparameter Tuning](#hyperparameter-tuning)
4. [Implementation](#implementation)
5. [Evaluation Metrics](#evaluation-metrics)
6. [Comparative Analysis](#comparative-analysis)


In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow import keras
# Import models you're considering


## Model Selection

[Discuss the type(s) of models you consider for this task, and justify the selection.]



## Feature Engineering

[Describe any additional feature engineering you've performed beyond what was done for the baseline model.]


In [None]:
# Load the dataset
# Define the paths to the folders containing the image data
train_data_dir = '../Dataset/cleaned_scaled_split/train/'
validation_data_dir = '../Dataset/cleaned_scaled_split/val/'
test_data_dir = '../Dataset/cleaned_scaled_split/test/'

# Set the parameters for image data generation
batch_size = 64
img_height = 256
img_width = 256
# Load the training data from the folders
train_ds = tf.keras.utils.image_dataset_from_directory(
    directory=train_data_dir,
    labels='inferred',
    label_mode='int',
    color_mode='rgb',
    batch_size=64,
    image_size=(256, 256))


# Load the validation data from the folders
validation_ds = tf.keras.utils.image_dataset_from_directory(
    directory=validation_data_dir,
    labels='inferred',
    label_mode='int',
    color_mode='rgb',
    batch_size=64,
    image_size=(256, 256))

# Load the validation data from the folders
test_ds = tf.keras.utils.image_dataset_from_directory(
    directory=test_data_dir,
    labels='inferred',
    label_mode='int',
    color_mode='rgb',
    batch_size=64,
    image_size=(256, 256))

## Hyperparameter Tuning

[Discuss any hyperparameter tuning methods you've applied, such as Grid Search or Random Search, and the rationale behind them.]


In [None]:
# Implement hyperparameter tuning
# Example using GridSearchCV with a DecisionTreeClassifier
# param_grid = {'max_depth': [2, 4, 6, 8]}
# grid_search = GridSearchCV(DecisionTreeClassifier(), param_grid, cv=5)
# grid_search.fit(X_train, y_train)


## Implementation

[Implement the final model(s) you've selected based on the above steps.]


In [None]:
def preprocess_data(X, Y):
    """trains a convolutional neural network to classify the dataset"""
    X_p = keras.applications.resnet50.preprocess_input(X)
    #Y_p = keras.utils.to_categorical(Y, 10)
    return X_p,Y #Y_p


In [None]:
inputs = keras.Input(shape=(256, 256, 3))

In [None]:
resnet = keras.applications.ResNet50(weights='imagenet',
                                 include_top=False, input_tensor=inputs)

for layer in resnet.layers[:170]:
    layer.trainable = False

model = keras.models.Sequential()
#model.add(keras.layers.Lambda(lambda x: tf.image.resize(x, (224, 224))))
model.add(resnet)
model.add(keras.layers.GlobalAveragePooling2D())
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(256, activation='relu'))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(128, activation='relu'))
model.add(keras.layers.Dropout(0.3))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(64, activation='relu'))
model.add(keras.layers.Dropout(0.3))
model.add(keras.layers.BatchNormalization())
model.add(keras.layers.Dense(10, activation='sigmoid'))

model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=False),
              metrics=['accuracy'])
model.summary()

In [None]:

# for trainX, trainy in train_ds:
#     xtemp, ytemp = preprocess_data(trainX, trainy)
#     #if len(xtemp) == 64:
#     if len(trainX_list) == 0:
#         trainX_list = xtemp
#         trainy_list = ytemp
#     else:
#         trainX_list = np.concatenate((trainX_list, xtemp))
#         trainy_list = np.concatenate((trainy_list, ytemp))


In [None]:
def dataprocessing(dataset,listX,listy):
    for X, y in dataset:
        xtemp, ytemp = preprocess_data(X, y)
        if len(listX) == 0:
            listX = xtemp
            listy = ytemp
        else:
            listX = np.concatenate((listX, xtemp))
            listy = np.concatenate((listy, ytemp))
    return listX, listy 

In [None]:
trainX_list, trainy_list = dataprocessing(train_ds,np.array([]), np.array([]))
valX_list, valy_list = dataprocessing(validation_ds,np.array([]), np.array([]))
testX_list, testy_list =  dataprocessing(test_ds,np.array([]), np.array([]))

In [None]:
trainy_list

In [None]:
model.fit(trainX_list, trainy_list, batch_size=64, epochs=10,
          validation_data=(valX_list, valy_list), shuffle=True)

In [None]:
model.save('Res50_no_resize_tvt_split.keras')

In [None]:
import numpy as np
try:
    model.summary()
except NameError:
    model = tf.keras.models.load_model('Res50_no_resize_tvt_split.keras')	

class_correct = [0]*10
class_counters = [0]*10
for batch in test_ds:
    images, labels = batch
    images_pp = keras.applications.resnet50.preprocess_input(images)
    predictions = model.predict(images_pp)
    y_pred_classes = np.argmax(predictions, axis=1)
    print(np.mean(y_pred_classes==labels.numpy()))
    for i in range(10):
        class_correct[i] += np.sum(y_pred_classes[labels.numpy()==i] == i)
        class_counters[i] += np.sum(labels.numpy()==i)

calss_accuracy = [class_correct[i]/class_counters[i] for i in range(10)]

In [None]:
import os

train_dir = '../Dataset/train_cleaned_scaled/train/'

datadirs = os.listdir(train_dir)
print(datadirs)

In [None]:
import matplotlib.pyplot as plt

# Define the class labels
class_labels = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

# Plot the bar chart
plt.figure(figsize=(8, 4), dpi=150)
plt.grid()
plt.bar(class_labels, np.asarray(calss_accuracy)*100)

# Add labels and title
#plt.xlabel('Class')
plt.ylabel('Accuracy in %')
plt.title('Accuracy of the baseline model on the test data for each class')

plt.ylim(60, 100)
plt.xticks(class_labels,datadirs, rotation=45, ha='right')
# Show the plot
plt.show()

## Evaluation Metrics

[Clearly specify which metrics you'll use to evaluate the model performance, and why you've chosen these metrics.]


In [None]:
# Evaluate the model using your chosen metrics
# Example for classification
# y_pred = model.predict(X_test)
# print(classification_report(y_test, y_pred))

# Example for regression
# mse = mean_squared_error(y_test, y_pred)

# Your evaluation code here


## Comparative Analysis

[Compare the performance of your model(s) against the baseline model. Discuss any improvements or setbacks and the reasons behind them.]


In [None]:
# Comparative Analysis code (if applicable)
# Example: comparing accuracy of the baseline model and the new model
# print(f"Baseline Model Accuracy: {baseline_accuracy}, New Model Accuracy: {new_model_accuracy}")
