In [99]:
import torch
from torch import nn
from torchvision import models, transforms
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from PIL import Image
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns

In [3]:
model = models.resnet18(weights=models.ResNet18_Weights.DEFAULT)
model.fc = nn.Identity()
model.eval()

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [4]:
preprocess = transforms.Compose([
    transforms.Resize(256),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize(mean=(122.7, 114.6, 100.9), std=(59.2, 58.4, 59.0)),
])


In [5]:
list_file = 'data/oxford_pets/annotations/list.txt'
root_path = 'data/oxford_pets/images/' 
labels = []
names=[]
image_paths = []

for line in open(list_file).read().splitlines():
    if line[0]!="#":
        name, class_id, species_id, breed_id = line.strip().split()
        image_paths.append(f'{root_path}/{name}.jpg')
        labels.append(int(species_id)-1)
        names.append(name)

classes = torch.tensor(labels)


In [6]:

batch_size = 2000
num_batches = len(image_paths) // batch_size

def process_batch(batch_paths):
    images = []
    for img_path in batch_paths:
        img = Image.open(img_path).convert("RGB")
        img = preprocess(img)
        images.append(img)
    return torch.stack(images)

features_list = []
with torch.no_grad():
    for i in range(num_batches):
        batch_paths = image_paths[i * batch_size : (i + 1) * batch_size]
        images = process_batch(batch_paths)
        batch_features = model(images)
        features_list.append(batch_features)


    if len(image_paths) % batch_size != 0:
        batch_paths = image_paths[num_batches * batch_size :]
        images = process_batch(batch_paths)
        batch_features = model(images)
        features_list.append(batch_features)

features = torch.cat(features_list)

print(features.shape)

torch.Size([7349, 512])


In [134]:
scores_train, scores_valid, targets_train, targets_valid = train_test_split(
    features, classes, test_size=0.3, random_state=42, stratify=classes
)
scores_train = scores_train.numpy()
scores_valid = scores_valid.numpy()
targets_train = targets_train.numpy()
targets_valid = targets_valid.numpy()

In [135]:
logistic = LogisticRegression(max_iter=5000)
knn = KNeighborsClassifier()
kms = KMeans(n_clusters=2, random_state=2048, init='random')

In [136]:
method = logistic
method.fit(scores_train, targets_train)
preds = method.predict(scores_valid)
conf_matrix_log = confusion_matrix(preds, targets_valid)
(preds==targets_valid).mean()

0.7306122448979592

In [122]:
conf_matrix_df = pd.DataFrame(conf_matrix_log, index=['True 0', 'True 1'], columns=['Pred 0', 'Pred 1'])
fig = px.imshow(conf_matrix_df, text_auto=True, color_continuous_scale='Blues', aspect='auto')
fig.update_layout(
    xaxis_title='Predição', yaxis_title='Verdadeiro',
    width=500,
    height=400,
    margin=dict(l=20, r=20, t=20, b=20))
fig.write_image("plots/mat_log.pdf", engine="kaleido")
fig.show()

In [132]:
knn.fit(scores_train, targets_train)
knn_pred = knn.predict(scores_valid)
conf_matrix_knn = confusion_matrix(knn_pred, targets_valid)
acc = accuracy_score(targets_valid, knn_pred)
acc

0.7115646258503401

In [123]:
conf_matrix_df = pd.DataFrame(conf_matrix_knn, index=['True 0', 'True 1'], columns=['Pred 0', 'Pred 1'])
fig = px.imshow(conf_matrix_df, text_auto=True, color_continuous_scale='Blues', aspect='auto')
fig.update_layout( 
    xaxis_title='Predição', yaxis_title='Verdadeiro',
    width=500,
    height=400,
    margin=dict(l=20, r=20, t=20, b=20))
fig.write_image("plots/mat_knn.pdf", engine="kaleido")
fig.show()

In [133]:
cluster_labels_train = kms.fit_predict(scores_train)
cluster_labels_valid = kms.fit_predict(scores_valid)
conf_matrix_kmeans = confusion_matrix(cluster_labels_valid, targets_valid)
(cluster_labels_valid==targets_valid).mean()


0.5095238095238095

In [124]:
conf_matrix_df = pd.DataFrame(conf_matrix_kmeans, index=['True 0', 'True 1'], columns=['Pred 0', 'Pred 1'])
fig = px.imshow(conf_matrix_df, text_auto=True, color_continuous_scale='Blues', aspect='auto')
fig.update_layout( 
    xaxis_title='Predição', yaxis_title='Verdadeiro',
    width=500,
    height=400,
    margin=dict(l=20, r=20, t=20, b=20))
fig.write_image("plots/mat_kmeans.pdf", engine="kaleido")
fig.show()

In [32]:
array = cluster_labels_valid
print(f'len: {len(array)}\ncount1: {np.sum(array)}\ncount0: {len(array) - np.sum(array)}')

len: 3675
count1: 1052
count0: 2623


In [97]:
dataFile = pd.read_csv('info.csv')
df_long = pd.melt(dataFile, id_vars=['Model'], var_name='count', value_name='dist')
fig = px.bar(df_long, x='Model', y='dist', color='count', barmode='group', labels={'Model':'Modelos', 'dist':'Distribuição','count':'Classificação'})

fig.update_layout(
    width=1000,
    height=400,
    margin=dict(l=20, r=20, t=20, b=20)
)

fig.write_image("plots/dist.pdf", engine="kaleido")

fig.show()

In [98]:
df = pd.read_csv('acc.csv')

fig = px.bar(df, x='Model', y='Accuracy', labels={'Model':'Modelos', 'Accuracy':'Accurácia'})

fig.update_layout(
    width=600,
    height=400,
    margin=dict(l=20, r=20, t=20, b=20)
)

fig.write_image("plots/acc.pdf", engine="kaleido")

fig.show()