# Llegim totes les imatges del dataset

In [1]:
import os

def get_image_files_by_folder(root_folder, image_extensions=('jpg', 'jpeg', 'png', 'gif', 'bmp')):
    image_files_by_folder = {}

    # Walk through the root folder and its subdirectories
    for folder_path, _, files in os.walk(root_folder):
        folder_name = os.path.basename(folder_path)
        image_files = []

        for file in files:
            file_extension = file.lower().split('.')[-1]
            if file_extension in image_extensions:
                image_files.append(os.path.join(folder_path, file))

        if image_files:
            image_files_by_folder[folder_name] = image_files

    return image_files_by_folder

In [2]:
train_dataset_path = "data/train"
images_to_load = get_image_files_by_folder(train_dataset_path)
images_to_load.keys()

dict_keys(['bedroom', 'Coast', 'Forest', 'Highway', 'industrial', 'Insidecity', 'kitchen', 'livingroom', 'Mountain', 'Office', 'OpenCountry', 'store', 'Street', 'Suburb', 'TallBuilding'])

# Cream el dataframe

In [61]:
import os
import pandas as pd
from skimage.feature import hog
from skimage import io
from sklearn.feature_extraction import image
from sklearn.decomposition import PCA
import time

def read_and_flatten_images(image_list):
    start_time = time.time()
    image_data = []
    
    for image_path in image_list:
        img = io.imread(image_path)
        pca_img = pca_reduction(img)
        hog_vectors = hog(pca_img, orientations=8, pixels_per_cell=(20, 20),
                          cells_per_block=(10, 10), channel_axis=None)
        image_data.append(hog_vectors)
    
    end_time = time.time()
    print("Read + HOG time : ", end_time - start_time, " seconds")

    return image_data

def pca_reduction(input_data):
    pca = PCA(200)
    pca.fit(input_data)
    pca_data = pca.transform(input_data)
    return pca_data

In [62]:
image_df = pd.DataFrame()
for label, image_list in images_to_load.items():
    image_data = read_and_flatten_images(images_to_load[label])
    next_part = pd.DataFrame(image_data)
    next_part['label'] = label
    image_df = pd.concat([image_df, next_part], ignore_index=True)

Read + HOG time :  0.9218354225158691  seconds
Read + HOG time :  0.8918097019195557  seconds
Read + HOG time :  0.8948125839233398  seconds
Read + HOG time :  0.8797986507415771  seconds
Read + HOG time :  0.8727920055389404  seconds
Read + HOG time :  0.9068238735198975  seconds
Read + HOG time :  0.8868050575256348  seconds
Read + HOG time :  0.8798017501831055  seconds
Read + HOG time :  0.8918101787567139  seconds
Read + HOG time :  0.8797988891601562  seconds
Read + HOG time :  0.8797988891601562  seconds
Read + HOG time :  0.8908092975616455  seconds
Read + HOG time :  0.8577785491943359  seconds
Read + HOG time :  0.8592948913574219  seconds
Read + HOG time :  0.8637845516204834  seconds


In [63]:
image_df.tail()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,791,792,793,794,795,796,797,798,799,label
1495,0.229905,0.015361,0.00668,0.00271,0.015779,0.004498,0.020052,0.190435,0.019736,0.009258,...,0.000378,0.000102,7.6e-05,0.000112,0.00013,9.4e-05,8.7e-05,7.3e-05,7.6e-05,TallBuilding
1496,0.225886,0.020024,0.012234,0.010951,0.02026,0.017093,0.037002,0.225886,0.046505,0.024213,...,0.000332,0.00012,5e-05,3.9e-05,3e-05,6.5e-05,6.9e-05,0.000123,0.000132,TallBuilding
1497,0.103775,0.001477,5.8e-05,0.000184,0.006315,0.000381,0.000998,0.230154,0.008554,0.001161,...,0.000128,5.6e-05,2.5e-05,2.5e-05,2.3e-05,2.3e-05,1.4e-05,1.9e-05,4e-05,TallBuilding
1498,0.156037,0.022486,0.016395,0.012957,0.024969,0.016237,0.032049,0.201537,0.03803,0.021154,...,0.000489,0.000181,0.000102,6.2e-05,6.1e-05,8.3e-05,0.000105,0.00015,9.4e-05,TallBuilding
1499,0.08892,0.031172,0.005435,0.00382,0.012759,0.012513,0.029191,0.178505,0.040509,0.038767,...,0.000119,4.7e-05,1.3e-05,3e-06,1.1e-05,1.2e-05,9e-06,1.5e-05,4e-05,TallBuilding


In [64]:
# TODO: read https://scikit-learn.org/stable/modules/preprocessing.html#preprocessing
image_df.shape

(1500, 801)

# Cream els conjunts de test i d'entrenament

In [65]:
from sklearn.model_selection import train_test_split

# TODO: Es necesari? tenim un conjunt de test ja. Això seria un conjunt de validacio "per fer overfitting" :DD
X_train, X_test, y_train, y_test = train_test_split(image_df.drop('label', axis=1), image_df['label'], test_size=0.33, random_state=42)

# Entrenam el model

In [68]:
from sklearn.metrics import precision_score, make_scorer
from sklearn.svm import SVC

svm = SVC(C=1.0, kernel='poly', random_state=42)
svm.fit(X_train, y_train)

SVC(kernel='poly', random_state=42)

# Donam una predicció

In [69]:
y_predicted = svm.predict(X_test)

print(f"La precisició es de: {precision_score(y_test, y_predicted, average='micro')}")

La precisició es de: 0.4909090909090909
