In [41]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from sklearn.model_selection import train_test_split
import numpy as np
import pandas as pd

In [42]:
class MyDataset(Dataset):
    def __init__(self, data, labels):
        self.data = data
        self.labels = labels

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

In [43]:
df = pd.read_csv('../Datasets/ultimate_color.csv')
df_test = pd.read_csv('../Datasets/ultimate_color_test.csv')

In [44]:
df = df.loc[:, [
'original_glcm_MaximumProbability',
       'original_glrlm_LongRunHighGrayLevelEmphasis',
       'original_glrlm_LowGrayLevelRunEmphasis',
       'original_glszm_LargeAreaEmphasis',
       'original_glszm_LargeAreaLowGrayLevelEmphasis',
       'original_glszm_ZoneEntropy', 'original_shape2D_Sphericity',
       'original_firstorder_90Percentile_r',
       'original_firstorder_Kurtosis_r', 'original_firstorder_Skewness_r',
       'original_glcm_ClusterShade_r', 'original_glcm_MCC_r',
       'original_gldm_LargeDependenceLowGrayLevelEmphasis_r',
       'original_gldm_SmallDependenceEmphasis_r',
       'original_glrlm_LongRunEmphasis_r',
       'original_glrlm_LongRunLowGrayLevelEmphasis_r',
       'original_glrlm_ShortRunEmphasis_r',
       'original_glrlm_ShortRunLowGrayLevelEmphasis_r',
       'original_glszm_LargeAreaHighGrayLevelEmphasis_r',
       'original_glszm_LargeAreaLowGrayLevelEmphasis_r',
       'original_glszm_SizeZoneNonUniformity_r',
       'original_glszm_SmallAreaEmphasis_r',
       'original_glszm_SmallAreaLowGrayLevelEmphasis_r',
       'original_glszm_ZoneVariance_r', 'original_ngtdm_Busyness_r',
       'original_ngtdm_Coarseness_r', 'original_ngtdm_Strength_r',
       'original_shape2D_MeshSurface_r',
       'original_shape2D_PixelSurface_r', 'original_shape2D_Sphericity_r',
       'original_firstorder_10Percentile_g',
       'original_firstorder_InterquartileRange_g',
       'original_glcm_ClusterShade_g', 'original_glcm_MCC_g',
       'original_glcm_MaximumProbability_g',
       'original_gldm_SmallDependenceHighGrayLevelEmphasis_g',
       'original_glrlm_LongRunHighGrayLevelEmphasis_g',
       'original_glrlm_LongRunLowGrayLevelEmphasis_g',
       'original_glszm_ZoneEntropy_g', 'original_shape2D_MeshSurface_g',
       'original_shape2D_PixelSurface_g',
       'original_firstorder_Kurtosis_b',
       'original_glcm_ClusterProminence_b',
       'original_glcm_ClusterShade_b',
       'original_gldm_DependenceNonUniformityNormalized_b',
       'original_gldm_LargeDependenceHighGrayLevelEmphasis_b',
       'original_glrlm_RunLengthNonUniformity_b',
       'original_glszm_LargeAreaEmphasis_b',
       'original_glszm_LargeAreaHighGrayLevelEmphasis_b',
       'original_ngtdm_Busyness_b', 'original_ngtdm_Coarseness_b',
       'original_shape2D_MeshSurface_b',
       'original_shape2D_PixelSurface_b',
        'category'
                 ]]

df_test = df_test.loc[:, [
'original_glcm_MaximumProbability',
       'original_glrlm_LongRunHighGrayLevelEmphasis',
       'original_glrlm_LowGrayLevelRunEmphasis',
       'original_glszm_LargeAreaEmphasis',
       'original_glszm_LargeAreaLowGrayLevelEmphasis',
       'original_glszm_ZoneEntropy', 'original_shape2D_Sphericity',
       'original_firstorder_90Percentile_r',
       'original_firstorder_Kurtosis_r', 'original_firstorder_Skewness_r',
       'original_glcm_ClusterShade_r', 'original_glcm_MCC_r',
       'original_gldm_LargeDependenceLowGrayLevelEmphasis_r',
       'original_gldm_SmallDependenceEmphasis_r',
       'original_glrlm_LongRunEmphasis_r',
       'original_glrlm_LongRunLowGrayLevelEmphasis_r',
       'original_glrlm_ShortRunEmphasis_r',
       'original_glrlm_ShortRunLowGrayLevelEmphasis_r',
       'original_glszm_LargeAreaHighGrayLevelEmphasis_r',
       'original_glszm_LargeAreaLowGrayLevelEmphasis_r',
       'original_glszm_SizeZoneNonUniformity_r',
       'original_glszm_SmallAreaEmphasis_r',
       'original_glszm_SmallAreaLowGrayLevelEmphasis_r',
       'original_glszm_ZoneVariance_r', 'original_ngtdm_Busyness_r',
       'original_ngtdm_Coarseness_r', 'original_ngtdm_Strength_r',
       'original_shape2D_MeshSurface_r',
       'original_shape2D_PixelSurface_r', 'original_shape2D_Sphericity_r',
       'original_firstorder_10Percentile_g',
       'original_firstorder_InterquartileRange_g',
       'original_glcm_ClusterShade_g', 'original_glcm_MCC_g',
       'original_glcm_MaximumProbability_g',
       'original_gldm_SmallDependenceHighGrayLevelEmphasis_g',
       'original_glrlm_LongRunHighGrayLevelEmphasis_g',
       'original_glrlm_LongRunLowGrayLevelEmphasis_g',
       'original_glszm_ZoneEntropy_g', 'original_shape2D_MeshSurface_g',
       'original_shape2D_PixelSurface_g',
       'original_firstorder_Kurtosis_b',
       'original_glcm_ClusterProminence_b',
       'original_glcm_ClusterShade_b',
       'original_gldm_DependenceNonUniformityNormalized_b',
       'original_gldm_LargeDependenceHighGrayLevelEmphasis_b',
       'original_glrlm_RunLengthNonUniformity_b',
       'original_glszm_LargeAreaEmphasis_b',
       'original_glszm_LargeAreaHighGrayLevelEmphasis_b',
       'original_ngtdm_Busyness_b', 'original_ngtdm_Coarseness_b',
       'original_shape2D_MeshSurface_b',
       'original_shape2D_PixelSurface_b',
        'category'
                 ]]

In [45]:
X = df.iloc[:, 0:-1].values
y = df.iloc[:, -1].values
X_test = df_test.iloc[:, 0:-1].values
y_test = df_test.iloc[:, -1].values
X_train, X_val, y_train, y_val = train_test_split(X, y,stratify=y,test_size=0.3, random_state=42)

In [46]:
X_train, X_val, X_test = torch.FloatTensor(X_train), torch.FloatTensor(X_val) ,torch.FloatTensor(X_test)
y_train, y_val, y_test = torch.LongTensor(y_train), torch.LongTensor(y_val), torch.LongTensor(y_test)

In [47]:
train_dataset = MyDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

val_dataset = MyDataset(X_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

test_dataset = MyDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [48]:
class MyModel(nn.Module):
    def __init__(self, num_feature, num_class):
        super(MyModel, self).__init__()
        
        self.layer_1 = nn.Linear(num_feature, 512)
        self.layer_2 = nn.Linear(512, 128)
        self.layer_3 = nn.Linear(128, 64)
        self.layer_out = nn.Linear(64, num_class) 
        
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(p=0.2)
        self.batchnorm1 = nn.BatchNorm1d(512)
        self.batchnorm2 = nn.BatchNorm1d(128)
        self.batchnorm3 = nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = self.layer_1(x)
        x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x

In [49]:
def train(model, train_loader, valid_loader, optimizer, criterion, epochs):
    best_valid_loss = float('inf')
    for epoch in range(epochs):
        train_loss = 0.0
        valid_loss = 0.0
        model.train()
        for data, label in train_loader:
            optimizer.zero_grad()
            output = model(data)
            loss = criterion(output, label)
            loss.backward()
            optimizer.step()
            train_loss += loss.item() * data.size(0)
        model.eval()
        with torch.no_grad():
            for data, label in valid_loader:
                output = model(data)
                loss = criterion(output, label)
                valid_loss += loss.item() * data.size(0)
        train_loss = train_loss / len(train_loader.dataset)
        valid_loss = valid_loss / len(valid_loader.dataset)
        print(f'Epoch: {epoch+1}, Training Loss: {train_loss:.4f}, Validation Loss: {valid_loss:.4f}')
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            #torch.save(model.state_dict(), 'best_model.pt')

In [50]:
model = MyModel(53,7)
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
train(model, train_loader, val_loader, optimizer, criterion, epochs=50)

Epoch: 1, Training Loss: 1.8374, Validation Loss: 1.8355
Epoch: 2, Training Loss: 1.8034, Validation Loss: 1.8509
Epoch: 3, Training Loss: 1.7983, Validation Loss: 1.8823
Epoch: 4, Training Loss: 1.7942, Validation Loss: 1.8426
Epoch: 5, Training Loss: 1.7933, Validation Loss: 1.8376
Epoch: 6, Training Loss: 1.7983, Validation Loss: 1.8280
Epoch: 7, Training Loss: 1.7903, Validation Loss: 1.7779


KeyboardInterrupt: 