# Evaluation of multiclass classification

The model is trained and ready to be evaluated. For this purpose, I have 6588 images that did not participate in training. I decided to unbalance the data and get the metrics readings considering the real, production distribution of the data

## STEP 1. Import data and libraries

In [1]:
import numpy as np 
import pandas as pd 
import zipfile
from matplotlib import pyplot as plt
import shutil 
from tqdm import tqdm
import torch
import torchvision
import time
import copy
from torchvision import transforms, models
import os
from datetime import datetime

In [3]:
data_root = r"data\data_root\al5083\test"
print(os.listdir(data_root))

['170904-150144-Al 2mm-part2', '170904-152301-Al 2mm-part2', '170904-154202-Al 2mm-part1', '170904-155610-Al 2mm', '170905-112213-Al 2mm', '170906-104925-Al 2mm', '170906-113317-Al 2mm-part1', '170906-143512-Al 2mm-part1', '170906-143512-Al 2mm-part2', '170906-151724-Al 2mm-part1', '170906-153326-Al 2mm-part3', '170906-155007-Al 2mm-part1', '170906-155007-Al 2mm-part2', '170913-154448-Al 2mm', 'test.json']


## STEP 2. Data markup

In [9]:
js = r"data\data_root\al5083\test\test.json"
labels = pd.read_json(js, typ='series')
labels = labels.to_frame()
labels = labels.reset_index()
labels = labels.rename(columns={'index':'path',0:'class'})
labels['class'] = labels['class'].astype(object)
labels['class'] = labels['class'].replace({0:'good_weld',
                                           1:'burn_through',
                                           2:'contamination',
                                           3:'lack_of_fusion',
                                           4:'misalignment',
                                           5:'lack_of_penetration'})
labels = labels.sort_values(by='class')
labels = labels.reset_index()
labels = labels.drop('index',axis=1)
labels = labels.loc[labels['class'] != 'good_weld']
classes = labels['class'].unique()
test_labels =labels

In [10]:
test_labels

Unnamed: 0,path,class
0,170906-143512-Al 2mm-part2/frame_01945.png,burn_through
1,170906-143512-Al 2mm-part2/frame_01841.png,burn_through
2,170906-143512-Al 2mm-part2/frame_01774.png,burn_through
3,170906-143512-Al 2mm-part2/frame_01906.png,burn_through
4,170906-143512-Al 2mm-part2/frame_01764.png,burn_through
...,...,...
6583,170904-150144-Al 2mm-part2/frame_00518.png,misalignment
6584,170904-150144-Al 2mm-part2/frame_00639.png,misalignment
6585,170904-150144-Al 2mm-part2/frame_00343.png,misalignment
6586,170904-150144-Al 2mm-part2/frame_00608.png,misalignment


## STEP 3. Load images 

In [7]:
for class_name in classes:
    os.makedirs(os.path.join('data/multiclass_test', class_name), exist_ok=True)

In [11]:
test_dir = 'data/multiclass_test'
for class_name in classes: 
    for i, file_name in enumerate(tqdm(labels['path'].loc[labels['class']==class_name].tolist())):
        pic_name = str(class_name) + '_' + str(i) + '.png'
        shutil.copy(os.path.join(data_root, file_name), os.path.join(os.path.join(test_dir, class_name,pic_name)))
print('Изображения для теста прогружены!') 

100%|██████████| 351/351 [00:00<00:00, 1281.12it/s]
100%|██████████| 2078/2078 [00:02<00:00, 763.21it/s]
100%|██████████| 1007/1007 [00:00<00:00, 1259.55it/s]
100%|██████████| 234/234 [00:00<00:00, 1272.33it/s]
100%|██████████| 729/729 [00:01<00:00, 636.73it/s] 

Изображения для теста прогружены!





In [12]:
data = []
for root, dirs, files in os.walk(test_dir):
    for file in files:
        file_path = os.path.join(root, file)
        file_name = os.path.basename(file_path)
        dir_name = os.path.basename(root)
        data.append((file_name, dir_name))

dfw = pd.DataFrame(data, columns=['path', 'test_class'])
dfw

Unnamed: 0,path,test_class
0,burn_through_0.png,burn_through
1,burn_through_1.png,burn_through
2,burn_through_10.png,burn_through
3,burn_through_100.png,burn_through
4,burn_through_101.png,burn_through
...,...,...
4394,misalignment_95.png,misalignment
4395,misalignment_96.png,misalignment
4396,misalignment_97.png,misalignment
4397,misalignment_98.png,misalignment


## STEP 4. Transform

In [13]:
test_transforms = transforms.Compose([ 
    transforms.Resize((224, 224)), 
    transforms.ToTensor(), 
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
test_dataset = torchvision.datasets.ImageFolder(test_dir, test_transforms)

In [14]:
batch_size = 8

In [15]:
test_dataloader = torch.utils.data.DataLoader(
        test_dataset, batch_size=batch_size, shuffle=False, num_workers=0) 

In [16]:
len(test_dataset)

4399

## STEP 5. Load the model

In [8]:
model = torch.load(r"desnet_model.pth",map_location=torch.device('cuda:0'))

In [9]:
device = torch.device("cuda:0")
model = model.to(device)
model.eval()

DenseNet(
  (features): Sequential(
    (conv0): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
    (norm0): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (relu0): ReLU(inplace=True)
    (pool0): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
    (denseblock1): _DenseBlock(
      (denselayer1): _DenseLayer(
        (norm1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu1): ReLU(inplace=True)
        (conv1): Conv2d(64, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
        (norm2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu2): ReLU(inplace=True)
        (conv2): Conv2d(128, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      )
      (denselayer2): _DenseLayer(
        (norm1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (relu

## STEP 6. TESTING

In [10]:
start = datetime.now()
test_predictions = []
test_class_indices = []
test_img_paths = []

for inputs, paths in tqdm(test_dataloader):
    with torch.no_grad():
        preds = model(inputs.to(device))
        probs = torch.nn.functional.softmax(preds, dim=1)
        max_probs, max_indices = torch.max(probs, dim=1)
        test_predictions.append(probs.cpu().numpy())  # Вероятности для каждого класса
        test_class_indices.append(max_indices.cpu().numpy())  # Индексы классов с максимальными вероятностями
        test_img_paths.extend(paths)

test_predictions = np.concatenate(test_predictions)
test_class_indices = np.concatenate(test_class_indices)
finish = datetime.now() - start
print('Времени на тестирование всех изображений ушло: ', finish)

100%|██████████| 550/550 [02:39<00:00,  3.45it/s]

Времени на тестирование всех изображений ушло:  0:02:39.380490





## STEP 7. METRICS

In [12]:
df = pd.DataFrame({'path': test_img_paths}) 
for i in range(5):
    df[f'prob_{i+1}'] = [probs[i] for probs in np.round(test_predictions,2)] 
df['predicted_class'] = test_class_indices
df['predicted_class'] = df['predicted_class'].replace({0:'burn_through',
                                           1:'contamination',
                                           2:'lack_of_fusion',
                                           3:'lack_of_penetration',
                                           4:'misalignment'})

df['right_name'] = dfw['path']
df = df[['right_name','prob_1','prob_2','prob_3','prob_4','prob_5','predicted_class']]
df['real_class'] = dfw['test_class']
df = df.rename(columns={'prob_1':'burn_through',
                       'prob_2':'contamination',
                       'prob_3':'lack_of_fusion',
                       'prob_4':'lack_of_penetration',
                       'prob_5':'misalignment'})
def apply_conditions(df):
    if df.real_class != df.predicted_class:
        return 'FP'
    else:
        return 'TP'

df['answer'] = df.apply(apply_conditions, axis=1)
answer_group = df.groupby(df.answer).agg({'answer':'count'})
answer_group = answer_group.rename(columns={'answer':'Count'})
answer_group

Unnamed: 0_level_0,Count
answer,Unnamed: 1_level_1
FP,1159
TP,3238


In [13]:
answer_group = answer_group.reset_index()
FP = int(answer_group['Count'][0])
TP = int(answer_group['Count'][1])
print(TP / (FP + TP))

0.7364111894473505


In [None]:
res = df.loc[df.answer == 'FP']

In [16]:
df.groupby([df['real_class'],df['predicted_class']]).agg({'result':'count'})

Unnamed: 0_level_0,Unnamed: 1_level_0,result
real_class,predicted_class,Unnamed: 2_level_1
burn_through,burn_through,301
burn_through,contamination,50
contamination,burn_through,10
contamination,contamination,1790
contamination,misalignment,278
lack_of_fusion,burn_through,5
lack_of_fusion,contamination,375
lack_of_fusion,lack_of_fusion,598
lack_of_fusion,lack_of_penetration,27
lack_of_penetration,lack_of_penetration,213


0.7444292860391087 Vgg11 sgd best_model

                                                                                                                             
0.7364111894473505 Desnet121 Adagrad