## Import libraries

In [None]:
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.utils import to_categorical


# Load dataset

In [None]:
df = pd.read_csv("gtsrb_dataset.csv")

### relevant classes

In [None]:
speed_classes = [1, 2, 3, 4, 5, 7, 8, 9, 10, 11, 12]

### change label

In [None]:
new_labels = {
    1: 0,
    2: 1,
    3: 2,
    4: 3,
    5: 4,
    7: 5,
    8: 7,
    9: 8,
    10: 9,
    11: 10,
    12: 11
}

df_speed = pd.DataFrame(columns=df.columns)

for index, row in df.iterrows():
    original_label = row["label"]
    if original_label in speed_classes:
        
        
        new_row = row.copy()

        new_label = new_labels[original_label]
        new_row["label"] = new_label

        df_speed = pd.concat([df_speed, pd.DataFrame([new_row])], ignore_index=True)
print(df_speed)

### see the distribution of the dataset

In [None]:
counts = df_speed["label"].value_counts().sort_index()

# Afișăm valorile în consolă
print("Distribuția etichetelor:")
for label, count in counts.items():
    print(f"Etichetă {label}: {count} exemple")

# Afișăm graficul
plt.figure(figsize=(8, 5))
counts.plot(kind="bar")
plt.title("Distribuția etichetelor în dataset")
plt.xlabel("Etichetă")
plt.ylabel("Număr de exemple")
plt.grid(axis='y')
plt.show()

### Balance the dataframe

In [None]:

balanced_df = []
max_per_class = 1000
for label,count in counts.items():
    df_label = df_speed[df_speed['label'] == label]
    if len(df_label) > max_per_class:
        df_sample = df_label.sample(n=max_per_class, random_state=10)
        balanced_df.append(df_sample)
    else:
        balanced_df.append(df_label)

df_speed = pd.concat(balanced_df).reset_index(drop=True)

### see the new distribution

In [None]:
counts = df_speed["label"].value_counts().sort_index()

# Afișăm valorile în consolă
print("Distribuția etichetelor:")
for label, count in counts.items():
    print(f"Etichetă {label}: {count} exemple")

# Afișăm graficul
plt.figure(figsize=(8, 5))
counts.plot(kind="bar")
plt.title("Distribuția etichetelor în dataset")
plt.xlabel("Etichetă")
plt.ylabel("Număr de exemple")
plt.grid(axis='y')
plt.show()

## Prepare test train datasets

### shuffle the dataset

In [None]:
df_shuffled = df_speed.sample(frac=1, random_state=42).reset_index(drop=True)

### split the dataset

In [None]:
train_test_ratio = 0.8
train_size = (int)(len(df_shuffled) *0.8)
test_size = len(df_shuffled) - train_size
df_train    = df_shuffled.iloc[:train_size].reset_index(drop=True)
df_test     = df_shuffled.iloc[train_size:].reset_index(drop=True)

print("Train:", len(df_train), "– Test:", len(df_test))

### Preprocess images

### image size

In [None]:
image_size = (50,50)

In [None]:

def preprocess_image(img, size=image_size):
    img = img.resize(size)
    img = img.convert("L")
    arr = np.array(img, dtype=np.float32)   
    arr = arr.flatten()
    arr /= 255
    return arr

### one hot encoding function

In [None]:
  
def one_hot(Y, num_classes):
    Y = Y.astype(int)
    m = Y.shape[0]
    Y_oh = np.zeros((m, num_classes), dtype=np.int32)
    Y_oh[np.arange(m), Y] = 1
    return Y_oh


### change dataset format

### Prepare train df

In [None]:
total_pixels = image_size[0]*image_size[1]
num_classes = 12
number_images = len(df_train)

X_train = np.zeros((number_images, total_pixels), dtype=np.float32)
for i, path in enumerate(df_train["path"]):
    X_train[i,:] = preprocess_image(Image.open(path))
print(X_train.shape)
X_train = X_train / 255.0
print(X_train.shape)
Y_train = one_hot(np.array(df_train["label"].values.astype("float32")), num_classes)



### Prepare test df

In [None]:
number_images = len(df_test)
m_test = len(df_test)
X_test = np.zeros((total_pixels, number_images), dtype=np.float32)
for i, path in enumerate(df_test["path"]):
    X_test[:, i] = preprocess_image(Image.open(path))
    
X_test = X_test / 255.0
Y_test = one_hot(np.array(df_test["label"].values.astype("float32")), num_classes)
print(X_test.shape)

In [None]:
model = Sequential([
    Dense(20, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(20, activation='relu'),
    Dense(num_classes, activation='softmax')
])


In [None]:
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)


In [None]:
history = model.fit(
    X_train, Y_train,
    epochs=20,
    batch_size=32,
    validation_data=(X_test, Y_test)
)


In [None]:
loss, accuracy = model.evaluate(X_test, Y_test)
print(f"Test Accuracy: {accuracy * 100:.2f}%")


## Homework 2: Use tensorflow to create and train the neural network 

#### Do not change the code from above