## Часть 0: Импортирование модулей

In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import pickle

In [2]:
import os

from PIL import Image, ImageEnhance
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from skimage.feature import hog
from skimage.feature import local_binary_pattern
from skimage.color import rgb2gray
from sklearn.decomposition import PCA


In [3]:
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
from torchvision import transforms
from torchvision.transforms import v2
from torch import float32
from torch import manual_seed

## Часть 1: Загрузка фотографий


In [4]:
size = (128, 128) # стандарт размера для каждой картинки
pixels = 128*128

In [5]:
data_transform = v2.Compose([
    v2.Resize(size),
    v2.ToImage(),
    v2.ToDtype(float32),
    v2.Normalize([0.5], [0.5]),
    v2.Grayscale() 
])

In [6]:
train = ImageFolder('data/train', transform = data_transform)
test = ImageFolder('data/test', transform = data_transform)

In [7]:
curr_classes = train.classes
curr_classes_dict = train.class_to_idx

manual_seed(42)
train_loader = DataLoader(train, batch_size = len(train), shuffle=True)
test_loader = DataLoader(test, batch_size = len(test), shuffle=True)

x_train, y_train= next(iter(train_loader))[0].numpy(), next(iter(train_loader))[1].numpy()
x_test, y_test = next(iter(test_loader))[0].numpy(), next(iter(test_loader))[1].numpy()

In [8]:
x_train = x_train.reshape(-1, pixels)
x_test = x_test.reshape(-1, pixels)

In [9]:
pca = PCA(n_components=0.8, svd_solver='full')
pca.fit(x_train)
x_train_pca = pca.transform(x_train)
x_test_pca = pca.transform(x_test)

In [10]:
knn_params ={'n_neighbors': list(range(1, 20))}

tree_params = {
    "criterion": ['gini', 'entropy'],
    "max_depth": list(range(5, 20)),
    "random_state": [47],
    'class_weight': [{1:1, -1:1.5}, {1:1, -1:2}]
}

forest_params = {
    "n_estimators": list(range(10, 400, 10)),
    "criterion": ['gini', 'entropy', 'log_loss'],
    "random_state": [47],
    'class_weight': [{1:1, -1:1.5}, {1:1, -1:2}, {1:1, -1:1}]
}

svc_params = {
    'C': np.arange(0.5, 2.1, 0.1),
    'kernel': ['rbf',],
    'gamma': ['scale', 'auto'],
    'class_weight': [{1:1, -1:1.5}, {1:1, -1:2}]
}

In [15]:
names = ['knn', 'tree', 'forest', 'svc']

models = {'knn':[KNeighborsClassifier(), knn_params],
          'tree':[DecisionTreeClassifier(), tree_params],
          'forest':[RandomForestClassifier(), forest_params],
          'svc':[SVC(), svc_params]}

results = pd.DataFrame(columns = ['model_name', 'method', 'score'])
classifiers = []

for m in models.keys():
    print(m, 'is training')
    gs = GridSearchCV(estimator= models[m][0],
                        param_grid = models[m][1],
                        scoring = ['f1_micro', 'f1_macro'],
                        cv = 5,
                        n_jobs=12,
                        refit = 'f1_micro',
                        verbose= 4
                        )
    gs.fit(x_train, y_train)

    sc = gs.best_estimator_.score(x_test, y_test, average = 'micro')

    results.loc[-1] = [m, sc]
    results = results.reset_index(drop = True)
    classifiers.append(gs.best_estimator_)

knn is training
Fitting 5 folds for each of 19 candidates, totalling 95 fits


KeyboardInterrupt: 

In [12]:
results.sort_values(by = 'score')

Unnamed: 0,model_name,method,score


In [13]:
knn = KNeighborsClassifier(n_neighbors=10)

knn.fit(x_train, y_train)
knn.score(x_test, y_test)

0.015