In [3]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [4]:
image_size = (16, 16)

In [5]:
def read_df(image_folder, target = np.nan):
    images = []
    
    for filename in os.listdir(image_folder):
        if filename.endswith(".png"):
            img = cv2.imread(os.path.join(image_folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img_resized = cv2.resize(img, image_size)
                img_normalized = img_resized / 255.0  # Normalizálás 0-1 közé
                images.append(img_normalized.flatten())  # Laposítjuk a képet            
    
    images = np.array(images)
    df = pd.DataFrame(images)
    df.insert(0, 'target', target)
    
    return df

In [6]:
def read_df_test(image_folder):
    images = []
    labels = []
    
    for filename in os.listdir(image_folder):
        if filename.endswith(".png"):
            img = cv2.imread(os.path.join(image_folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img_resized = cv2.resize(img, image_size)
                img_normalized = img_resized / 255.0  # Normalizálás 0-1 közé
                images.append(img_normalized.flatten())  # Laposítjuk a képet
                label = filename
                labels.append(label)
    
    images = np.array(images)
    labels = np.array(labels)
    
    # DataFrame létrehozása a képadatokból és címkékből
    df = pd.DataFrame(images)  # Minden sor egy kép laposított verziója
    df.insert(0, 'label', labels)
    
    # A DataFrame ellenőrzése
    return df

In [7]:
def read_folders(folder_path):
    folders = []

    for folder in os.listdir(folder_path):
        if 'Sample' in folder:
            folders.append(folder)
    
    return sorted(folders)

In [8]:
df_array = []
df_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

for folder, i in zip(read_folders("data/Train"), df_names):
    df_array.append(read_df("data/Train/" + folder, i))
    
df = pd.concat(df_array)

In [9]:
X = df.iloc[:, 1:]

In [10]:
y = df.loc[:, 'target']

In [42]:
from tensorflow.keras import layers, models
from sklearn.model_selection import StratifiedKFold
import numpy as np

# Define input size and number of classes
input_size = (16, 16, 1)  # Input images are 16x16 grayscale
num_classes = len(df_names)  # Number of unique classes (59)

# Function to build the model
def build_model():
    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_size))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    model.add(layers.Flatten())
    model.add(layers.Dense(128, activation='relu'))
    model.add(layers.Dropout(0.5))
    model.add(layers.Dense(num_classes, activation='softmax'))
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# Prepare the data
X_data = X.values.reshape(-1, 16, 16, 1).astype('float32')  # Reshape for convolutional layers
y_data = y.factorize()[0]  # Convert categorical targets to numeric

# Stratified K-Fold Cross Validation
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

# Cross-validation loop
fold = 1
cv_scores = []

for train_index, test_index in skf.split(X_data, y_data):
    print(f"\nTraining Fold {fold}...")

    # Split data
    X_train, X_test = X_data[train_index], X_data[test_index]
    y_train, y_test = y_data[train_index], y_data[test_index]

    # Build and train the model
    model = build_model()
    history = model.fit(X_train, y_train,
                        epochs=10,
                        batch_size=32,
                        validation_data=(X_test, y_test),
                        verbose=1)

    # Evaluate the model on the test set
    score = model.evaluate(X_test, y_test, verbose=0)
    print(f"Fold {fold} Accuracy: {score[1] * 100:.2f}%")
    cv_scores.append(score[1])  # Append accuracy to scores
    fold += 1

# Print average accuracy across folds
print("\nCross-Validation Results:")
print(f"Mean Accuracy: {np.mean(cv_scores) * 100:.2f}%")
print(f"Standard Deviation: {np.std(cv_scores) * 100:.2f}%")


Training Fold 1...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 1 Accuracy: 85.58%

Training Fold 2...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 2 Accuracy: 84.24%

Training Fold 3...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 3 Accuracy: 83.00%

Training Fold 4...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 4 Accuracy: 85.42%

Training Fold 5...
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Fold 5 Accuracy: 84.35%

Cross-Validation Results:
Mean Accuracy: 84.52%
Standard Deviation: 0.93%


In [43]:
test_df = read_df_test('data/TestData')

In [44]:
# Use test_df for prediction
X_test = test_df.iloc[:, 1:]  # Skip the first column (labels or identifiers)

# Reshape and normalize test data
X_test_prepared = X_test.values.reshape(-1, 16, 16, 1).astype('float32')

# Predict probabilities
pred = model.predict(X_test_prepared)

# Get the predicted class indices
predicted_classes = pred.argmax(axis=1)

# Map indices to original class labels
predicted_labels = [df_names[i] for i in predicted_classes]

# Add predictions back to test DataFrame
test_df['pred'] = predicted_labels



In [48]:
model.save("models_/model_1")

INFO:tensorflow:Assets written to: models_/model_1/assets


INFO:tensorflow:Assets written to: models_/model_1/assets


In [45]:
test_df

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,247,248,249,250,251,252,253,254,255,pred
0,Test4751.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,2
1,Test2320.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,N
2,Test4989.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,N
3,Test6146.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,S
4,Test1629.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7095,Test2339.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,p
7096,Test4990.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,N
7097,Test5456.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,9
7098,Test4748.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,0


In [46]:
test_df[['pred', 'label']]

Unnamed: 0,pred,label
0,2,Test4751.png
1,N,Test2320.png
2,N,Test4989.png
3,S,Test6146.png
4,5,Test1629.png
...,...,...
7095,p,Test2339.png
7096,N,Test4990.png
7097,9,Test5456.png
7098,0,Test4748.png
