In [222]:
import os
import cv2
import numpy as np
import pandas as pd
from sklearn.naive_bayes import GaussianNB
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt

In [201]:
def read_df(image_folder, target = np.nan):
    image_size = (32, 32)
    images = []
    
    for filename in os.listdir(image_folder):
        if filename.endswith(".png"):
            img = cv2.imread(os.path.join(image_folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img_resized = cv2.resize(img, image_size)
                img_normalized = img_resized / 255.0  # Normalizálás 0-1 közé
                images.append(img_normalized.flatten())  # Laposítjuk a képet            
    
    images = np.array(images)
    df = pd.DataFrame(images)
    df.insert(0, 'target', target)
    
    return df

In [202]:
def read_df_test(image_folder):
    image_size = (32, 32)
    images = []
    labels = []
    
    for filename in os.listdir(image_folder):
        if filename.endswith(".png"):
            img = cv2.imread(os.path.join(image_folder, filename), cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img_resized = cv2.resize(img, image_size)
                img_normalized = img_resized / 255.0  # Normalizálás 0-1 közé
                images.append(img_normalized.flatten())  # Laposítjuk a képet
                label = filename
                labels.append(label)
    
    images = np.array(images)
    labels = np.array(labels)
    
    # DataFrame létrehozása a képadatokból és címkékből
    df = pd.DataFrame(images)  # Minden sor egy kép laposított verziója
    df.insert(0, 'label', labels)
    
    # A DataFrame ellenőrzése
    return df

In [203]:
def read_folders(folder_path):
    folders = []

    for folder in os.listdir(folder_path):
        if 'Sample' in folder:
            folders.append(folder)
    
    return sorted(folders)

In [223]:
df_array = []
df_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

for folder, i in zip(read_folders("data/Train"), df_names):
    df_array.append(read_df("data/Train/" + folder, i))
    
df = pd.concat(df_array)

In [224]:
X = df.iloc[:, 1:]

In [225]:
y = df.loc[:, 'target']

In [226]:
pred = cross_val_predict(GaussianNB(), X, y, cv=10)

In [227]:
accuracy_score(pred, y)

0.4059467388851275

In [234]:
df_test = read_df_test("data/TestData")

In [235]:
df_test.head(1)

Unnamed: 0,label,0,1,2,3,4,5,6,7,8,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,Test4751.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [240]:
model = GaussianNB()
model.fit(X, y)

In [241]:
X_test = df_test.iloc[:, 1:]

In [242]:
pred = model.predict(X_test)

In [244]:
df_test.insert(0, 'pred', pred)

In [245]:
df_test = df_test.sort_values(by='label').reset_index(drop=True)

In [246]:
df_test

Unnamed: 0,pred,label,0,1,2,3,4,5,6,7,...,1014,1015,1016,1017,1018,1019,1020,1021,1022,1023
0,I,Test0001.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
1,I,Test0002.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
2,j,Test0003.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
3,j,Test0004.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
4,j,Test0005.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7095,4,Test7096.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7096,J,Test7097.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7097,4,Test7098.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
7098,V,Test7099.png,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0
