In [1]:
import os
import glob
import numpy as np
import cv2
import matplotlib.pyplot as plt
import pandas as pd

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize([0.485, 0.456, 0.406], 
                                                     [0.229, 0.224, 0.225])
                                ])

In [3]:
data = datasets.ImageFolder(r"..\data\raw\Data from Steve\train", transform=transform)
loader = DataLoader(data, batch_size=1, shuffle=False)
class_names = data.classes

In [4]:
resnet = models.resnet.resnet18(pretrained=True)
resnet.fc = nn.Identity()

In [5]:
embeddings_df = pd.DataFrame(columns=[f'embedding_{i}' for i in range(512)])
embeddings_df['target'] = 0

In [6]:
for i, (image, y) in enumerate(loader):

    embedding = resnet(image).tolist()[0]
    embedding.append(y.item())
    embedding_dict = {k:v for k, v in zip(embeddings_df.columns, embedding)}
    embeddings_df.loc[i] = embedding_dict
    

In [7]:
embeddings_df['target'] = embeddings_df['target'].astype(str)

In [8]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

In [9]:
X = embeddings_df.drop('target', axis=1)
y = embeddings_df['target']

In [10]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, stratify=y)

In [11]:
RF = RandomForestClassifier()

fit = RF.fit(X_train, y_train)

In [12]:
pred = fit.predict(X_test)

In [13]:
(pred == y_test).sum()/len(y_test)

0.72

In [14]:
from sklearn.ensemble import GradientBoostingClassifier

In [15]:
XGB = GradientBoostingClassifier()

XGB_fit = XGB.fit(X_train, y_train)
pred = XGB_fit.predict(X_test)

In [19]:
(pred == y_test).sum()/len(y_test)

0.696