## Cлучайный лес

In [1]:
import os
import numpy as np
import pandas as pd

from torchvision.transforms import v2
from sklearn.decomposition import PCA
from PIL import Image, ImageOps
import albumentations as A



**Предобработка изображений**

In [2]:
def load_images(data='train', pca_components=100):
    path = f"../input/dermnet/{data}/"
    list_cat = os.listdir(path)
    
    X, y = ([], [])
    
    transform = A.Compose([
    A.Rotate(limit=180, border_mode=0, p=1.0),
    A.GaussNoise(var_limit=(10, 30), p=0.5), 
    A.RandomBrightnessContrast(brightness_limit=0.25, p=0.25)
    ])
    
    for i, cat in enumerate(list_cat):  
        print(f'{i}: {cat}')
        list_images = os.listdir(path + cat)
        
        for j in list_images:   
            with Image.open(path + cat + "/" + j) as img:
                # приводим изображения к чёрно-белому формату и одному размеру
                img = np.array(v2.Resize(size=(256, 256))(ImageOps.grayscale(img)))
                
                for p in range(3): 
                    # применяем аугментации 4 раза 
                    if p:
                        img = transform(image=img)["image"]
                    
                    # применяем PCA с pca_components главными компонентами
                    pca = PCA(pca_components)
                    img_pca = pca.fit_transform(img)
                
                    # вытягиваем в одномерный массив и добавляем в матрицу объекты-признаки
                    X.append(img_pca.flatten())
                    y.append(i)
    
    return np.array(X), np.array(y)


X_train, y_train = load_images(data='train', pca_components=75)
X_test, y_test = load_images(data='test', pca_components=75)

0: Light Diseases and Disorders of Pigmentation
1: Lupus and other Connective Tissue diseases
2: Acne and Rosacea Photos
3: Systemic Disease
4: Poison Ivy Photos and other Contact Dermatitis
5: Vascular Tumors
6: Urticaria Hives
7: Atopic Dermatitis Photos
8: Bullous Disease Photos
9: Hair Loss Photos Alopecia and other Hair Diseases
10: Tinea Ringworm Candidiasis and other Fungal Infections
11: Psoriasis pictures Lichen Planus and related diseases
12: Melanoma Skin Cancer Nevi and Moles
13: Nail Fungus and other Nail Disease
14: Scabies Lyme Disease and other Infestations and Bites
15: Eczema Photos
16: Exanthems and Drug Eruptions
17: Herpes HPV and other STDs Photos
18: Seborrheic Keratoses and other Benign Tumors
19: Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions
20: Vasculitis Photos
21: Cellulitis Impetigo and other Bacterial Infections
22: Warts Molluscum and other Viral Infections
0: Light Diseases and Disorders of Pigmentation
1: Lupus and other Connective 

**Обучение моделей**

In [3]:
from sklearn.ensemble import RandomForestClassifier

# обучение модели
rf_clf = RandomForestClassifier(min_samples_leaf=5, random_state=123)
print(f'Fitting of RandomForestClassifier has started...')
rf_clf.fit(X_train, y_train)
print(f'Fitting of RandomForestClassifier has finished!')

Fitting of RandomForestClassifier has started...
Fitting of RandomForestClassifier has finished!


In [4]:
# предикты
y_pred_test = rf_clf.predict(X_test)
y_pred_proba_test = rf_clf.predict_proba(X_test)

In [6]:
# результаты на тесте
print(f'Metrics of RandomForest on test: ')
print(f'   accuracy_avg  - {accuracy_score(y_test, y_pred_test):.2f}')
print(f'   precision_avg - {precision_score(y_test, y_pred_test, average="micro"):.2f}')
print(f'   reсall_avg    - {recall_score(y_test, y_pred_test, average="micro"):.2f}')
print(f'   roc_auc_avg   - {roc_auc_score(y_test, y_pred_proba_test, multi_class="ovr", average="micro"):.2f}')

Metrics of RandomForest on test: 
   accuracy_avg  - 0.15
   precision_avg - 0.15
   reсall_avg    - 0.15
   roc_auc_avg   - 0.70


In [7]:
import pickle 

# сохраняем модель в pickle файл
pickle.dump(rf_clf, open('/kaggle/working/RF_model.pkl', 'wb'))
print(f'\nModel is dumped!')


Model is dumped!
