## Setup

### Importing Libraries

In [None]:
import math
from pathlib import Path
import numpy as np
import pandas as pd
import os
from typing import List, Text, Tuple
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
import tensorflow as tf
import keras 
from tqdm import tqdm
from keras.callbacks import EarlyStopping,ModelCheckpoint
from sklearn.metrics import confusion_matrix , accuracy_score
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
import glob 
import matplotlib.pyplot as plotter
import warnings
# warnings.filterwarnings('ignore')

In [None]:
from src.data.dataset import get_train_dataset, get_val_dataset, get_test_dataset, get_class_names
from src.data.config import DATASET_PATH, get_default_dataset_config
from src.model import get_model
from src.utils import PROJECT_ROOT_PATH

### Utility Functions

In [None]:
def get_filenames_df():
    files = [i for i in glob.glob(str(DATASET_PATH) + "//*//*")]
    np.random.shuffle(files)
    labels = [os.path.dirname(i).split("/")[-1] for i in files]
    data = zip(files, labels)
    dataframe = pd.DataFrame(data, columns = ["Image", "Label"])
    return dataframe

### Display Functions   

In [None]:
def plot_images(images: List[np.ndarray], subtitles: List[Text] = None, cols_count: int = 5, figsize: Tuple[int, int] = (20, 20)):
    rows_count = math.ceil(len(images) / cols_count)
    fig, axes = plt.subplots(rows_count, cols_count, figsize=figsize)
    
    if subtitles is not None and len(subtitles) != len(images):
        raise ValueError("Number of images and subtitles should be equal")
    if subtitles is None:
        subtitles = ["" for _ in range(len(images))]
        
    for image_idx in range(len(images)):
        i, j = math.floor(image_idx / cols_count), image_idx % cols_count
        if rows_count == 1:
            ax = axes[j]
        else:
            ax = axes[i, j]

        ax.imshow(images[image_idx])
        ax.set_title(subtitles[image_idx])
        ax.axis("off")

## Data Preprocessing

### Load the data

In [None]:
dataframe = get_filenames_df()

In [1]:
train_data = get_train_dataset()
val_data = get_val_dataset()
test_data = get_test_dataset()
class_names = get_class_names()

NameError: name 'get_train_dataset' is not defined

### Display the data

#### Display the distribution of the data

In [None]:
sns.countplot(x = dataframe["Label"])
plotter.xticks(rotation = 50);

#### Display some sample images

In [None]:
samples_per_class: int = 4

class_samples = [train_data.unbatch().filter(lambda img, img_label: img_label == class_idx).take(samples_per_class) for class_idx in range(len(class_names))]

images: List[np.ndarray] = []
labels: List[Text] = []

for samples in class_samples:
    for imgs, label_indices in samples:
        images.append(imgs.numpy().astype("uint8"))
        labels.append(class_names[label_indices.numpy() - 1])

plot_images(images, labels, cols_count=4)


## Training

### Model

In [None]:
model = get_model(class_count=len(class_names))
model.summary()

### Training


For training, please use the following command:
```bash
python -m scripts.train
```

### Checkpoints

Let's load the model from the last checkpoint and work on it.

In [None]:
from src.trainer.config import TrainingConfig, get_default_training_config, ModelCheckpointConfig, CHECKPOINTS_PATH, get_default_checkpoint_path
from src.data.config import  get_default_input_shape
from src.trainer.eval import eval

In [None]:
model = get_model(class_count=len(class_names))

In [None]:
config = get_default_training_config()
checkpoint_path = get_default_checkpoint_path()
input_shape = get_default_input_shape()

In [None]:
model.build(input_shape=input_shape)
model.load_weights(checkpoint_path)
model.compile()

### Evaluation

#### Evaluation on the validation set 

In [None]:
eval_result = eval(model, val_data)
score = eval_result[0]
metric_values = eval_result[1:]
metrics = dict(zip(config.metrics, metric_values))
print('Val Loss =', score)
for metric_name, metric_value in metrics.items():
    print(f"Val {metric_name} = {metric_value}")

### Visualization

In [None]:
samples_per_class: int = 2

class_samples = [val_data.unbatch().filter(lambda img, img_label: img_label == class_idx).take(samples_per_class) for class_idx in range(len(class_names))]

images: List[np.ndarray] = []
labels: List[Text] = []

for samples in class_samples:
    for imgs, label_indices in samples:
        images.append(imgs.numpy().astype("uint8"))
        labels.append(class_names[label_indices.numpy() - 1])

predictions = model.predict(np.array(images))
predicted_labels = [class_names[np.argmax(prediction)] for prediction in predictions]

subtitles = [f"True: {true_label}\nPredicted: {predicted_label}" for true_label, predicted_label in zip(labels, predicted_labels)]

plot_images(images, subtitles, cols_count=4)

## Testing

### Evaluation on the test set

In [None]:
eval_result = eval(model, test_data)
score = eval_result[0]
metric_values = eval_result[1:]
metrics = dict(zip(config.metrics, metric_values))
print('Test Loss =', score)
for metric_name, metric_value in metrics.items():
    print(f"Test {metric_name} = {metric_value}")

### Confusion Matrix

In [None]:
X,y_true,y_pred=[],[],[]
for images, labels in test_data:
    y_true.extend(labels.numpy())
    X.extend(images.numpy())
predictions=model.predict(np.array(X))
for i in predictions:
    y_pred.append(np.argmax(i))

cm = confusion_matrix(y_true, y_pred)
plt.figure(figsize=(10, 10))
sns.heatmap(cm, annot=True, fmt="d")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.show()