In [None]:
from data_loader import WireDataset
import torch
import math
import numpy as np
from torch.utils.data import DataLoader
from ML import Net
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torchmetrics.classification import F1Score, MulticlassRecall, MulticlassConfusionMatrix
import pandas as pd
import seaborn as sb


In [None]:
dataset = WireDataset("Input_Data_07_25.csv")

if torch.cuda.is_available():
    device = "cuda"
else:
    device = "cpu"

In [None]:
generator = torch.Generator().manual_seed(4)

train_fraction = math.floor(len(dataset)*0.7)
test_fraction = len(dataset) - train_fraction

test_data_ind, train_data_ind = torch.utils.data.random_split(dataset, [test_fraction, train_fraction], generator = generator)

dl_train = DataLoader(train_data_ind, batch_size=20, shuffle=True) #todevice
dl_test = DataLoader(test_data_ind, batch_size=20, shuffle=True) #todevice

net = Net() #todevice
#optimizer = torch.optim.SGD(net.parameters(), lr=0.0001)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

num_epochs = 100

In [None]:
epochs_stats = np.zeros((3, 100))
F1= F1Score("multiclass", num_classes = 6)
f1_max = 0

In [None]:
for epoch in range(num_epochs):  # loop over the dataset multiple times
    epoch_loss = []
    running_loss = 0.0
    net.train()
    for i, data in enumerate(dl_train, 0):
        # get the inputs; data is a list of [inputs, labels]
        inputs, labels = data

        # zero the parameter gradients
        optimizer.zero_grad()

        # forward + backward + optimize
        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        # print statistics
        running_loss += loss.item()
        epoch_loss.append(loss.item())
        if i % 10 == 9:    # print every 10 mini-batches
            print(f'[{epoch + 1}, {i + 1:5d}] loss: {running_loss / 10:.3f}')
            running_loss = 0.0
    
    predictions_list = [] #list for predictions (predicted number of label)
    values_list = []      #list for values (actual number of label)
    f1_list = np.zeros(len(dl_test))
    net.eval()
    running_loss = 0.0
    for i, data in enumerate(dl_test, 0):
        inputs, labels = data
        # forward + backward + optimize
        outputs = net(inputs)
        f1_list[i] = F1(outputs, labels).item()
        if i % 10 == 9:    # print every 10 mini-batches
            print(f"[batch {i}, size: {dl_test.batch_size}] F1 score: {f1_list[i]}")
    
    epoch_loss = np.array(epoch_loss, dtype="float")
    epochs_stats[0][epoch] = f1_list.mean()
    epochs_stats[1][epoch] = f1_list.std()
    epochs_stats[2][epoch] = epoch_loss.mean()
    
    if epochs_stats[0][epoch] > f1_max:
        torch.save(net.state_dict(), "model.ebr")
        f1_max=epochs_stats[0][epoch]

In [None]:
print('Finished Training')
stats_column_names = ["F1", "F1 Standard Deviation", "Loss"]
train_stats = pd.DataFrame(epochs_stats, index=stats_column_names)
train_stats.to_csv("Train_statistics.csv", encoding="-utf8", index=False)
ind = np.where(epochs_stats[0]==epochs_stats[0].max())
print(f"Maximum F1: {epochs_stats[0].max()} standard deviation: {epochs_stats[1][ind]}, Minimum Loss: {epochs_stats[2].min()}")
df = pd.read_csv("Train_statistics.csv")
xs = []
for i in range(100):
    xs.append(i)
array = df.to_numpy()
t_student = 2.336242
mult_factor = t_student/math.sqrt(20)
err = []
for i in range(len(array[1])):
    err.append(array[1][i]*mult_factor)
fig, ax1 = plt.subplots(figsize=(3.5, 3))
ax1.set_xlabel("epoch")
ax1.set_ylabel("F1 Score", color = "blue")
ax1.plot(array[0], color="blue")
ax1.fill_between(x=xs, y1= (array[0]+err), y2=(array[0]-err), facecolor='blue',alpha=0.25,edgecolor='none')
ax2 = ax1.twinx()
ax2.set_ylabel("Loss", color = "red")
ax2.plot(array[2], color = "red")

plt.savefig("f1.pdf", bbox_inches="tight")

In [None]:

mcr = MulticlassRecall(num_classes=6, average=None)
mccm = MulticlassConfusionMatrix(num_classes=6)

net.eval()
out = None
lab = None
f1_list = np.zeros(len(dl_test))
running_loss = 0.0
for i, data in enumerate(dl_test, 0):
    inputs, labels = data
    # forward + backward + optimize
    outputs = net(inputs)


    if out is None:
        out = outputs.detach()
    else:
        out = np.vstack((out, outputs.detach()))

    if lab is None:
        lab = labels.detach()
    else:
        lab = np.hstack((lab, labels.detach()))
    f1_list[i] = F1(outputs, labels).item()
    if i % 10 == 9:    # print every 10 mini-batches
        print(f"[batch {i}, size: {dl_test.batch_size}] F1 score: {f1_list[i]}")

print(mcr(torch.Tensor(out), torch.Tensor(lab)))
print(mccm(torch.Tensor(out), torch.Tensor(lab)))

In [None]:
#import matplotlib.pyplot as plt
#import numpy as np
#import pandas as pd
#import seaborn as sb

class_full_names = ["Parassitic", "Wire Parallel Defect", "Wire Parallel Perfect", "Wire Tilted Defect", "Wire Tilted Perfect", "Null",]
class_names = ["0", "1", "2", "3", "4", "5"]

confusion_matrix = mccm(torch.Tensor(out), torch.Tensor(lab)).numpy()# np.array([[ 34,   2,   6,   0,   1,   1],
                            #[  0,  11,  17,   0,   1,   2],
                            #[  0,   5, 567,   1,   4,  14],
                            #[  0,   1,   2,   4,   7,   2],
                            #[  0,   1,   9,   2, 248,   5],
                            #[  1,   4,  18,   2,   5, 129]], dtype="float")
norm_vector = np.linalg.norm(confusion_matrix, ord=1, axis=1)
norm_vector = norm_vector.reshape(-1, 1)
perc_matrix = np.round((confusion_matrix * 100 / norm_vector), 1)
dataframe = pd.DataFrame(perc_matrix, index=class_names, columns=class_names)

plt.figure(figsize=(3.5, 3))
 
# Create heatmap
sb.heatmap(dataframe, annot=True, cmap="rocket_r", vmin = 0, vmax = 100)
plt.title("Confusion Matrix"), plt.tight_layout()
 
plt.ylabel("True Class"), 
plt.xlabel("Predicted Class")
plt.savefig("confusion_matrix.pdf", bbox_inches="tight")