In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

In [4]:
df = pd.read_csv('DatasetInitalComplete - Copy.csv')
df.columns

Index(['target', 'age_approx', 'sex', 'anatom_site_general',
       'clin_size_long_diam_mm', 'tbp_lv_A', 'tbp_lv_Aext', 'tbp_lv_B',
       'tbp_lv_Bext', 'tbp_lv_C', 'tbp_lv_Cext', 'tbp_lv_H', 'tbp_lv_Hext',
       'tbp_lv_L', 'tbp_lv_Lext', 'tbp_lv_areaMM2', 'tbp_lv_area_perim_ratio',
       'tbp_lv_color_std_mean', 'tbp_lv_deltaA', 'tbp_lv_deltaB',
       'tbp_lv_deltaL', 'tbp_lv_deltaLB', 'tbp_lv_deltaLBnorm',
       'tbp_lv_eccentricity', 'tbp_lv_location', 'tbp_lv_location_simple',
       'tbp_lv_minorAxisMM', 'tbp_lv_nevi_confidence', 'tbp_lv_norm_border',
       'tbp_lv_norm_color', 'tbp_lv_perimeterMM',
       'tbp_lv_radial_color_std_max', 'tbp_lv_stdL', 'tbp_lv_stdLExt',
       'tbp_lv_symm_2axis', 'tbp_lv_symm_2axis_angle', 'tbp_lv_x', 'tbp_lv_y',
       'tbp_lv_z', 'mel_mitotic_index', 'mel_thick_mm',
       'tbp_lv_dnn_lesion_confidence'],
      dtype='object')

In [5]:
df.sort_values(by='target', ascending = False)
positives = df[df['target']==1]
negatives = df[df['target']==0]
print(len(positives),len(negatives))
print(positives)
print(negatives)

392 17158
     target  age_approx  sex  anatom_site_general  clin_size_long_diam_mm  \
0         1        80.0    2                    2                    9.27   
1         1        75.0    0                    1                    3.88   
2         1        80.0    0                    2                    6.55   
3         1        60.0    0                    0                    5.27   
4         1        55.0    0                    3                    5.29   
..      ...         ...  ...                  ...                     ...   
387       1        70.0    2                    3                    5.13   
388       1        50.0    2                    3                    8.16   
389       1        60.0    2                    3                    5.65   
390       1        60.0    0                    4                    8.51   
391       1        65.0    2                    4                   12.08   

      tbp_lv_A  tbp_lv_Aext   tbp_lv_B  tbp_lv_Bext   tbp_lv_C  .

In [6]:
data = pd.concat([positives,negatives[0:500]])
labels = data['target']
data = data.drop(columns=['target','age_approx','mel_thick_mm'])
scaler = MinMaxScaler()
data = scaler.fit_transform(data)
print(data.shape,labels.shape)

(892, 39) (892,)


In [7]:
import pandas as pd

# Assuming df is your DataFrame
nan_percentage = df.isnull().mean() * 100

# Display only columns with NaN values and their percentages
columns_with_nan_percentage = nan_percentage[nan_percentage > 0]
print(columns_with_nan_percentage)


age_approx                       0.632479
tbp_lv_eccentricity              0.005698
tbp_lv_location                  0.005698
tbp_lv_location_simple           0.005698
tbp_lv_minorAxisMM               0.005698
tbp_lv_nevi_confidence           0.005698
tbp_lv_norm_border               0.005698
tbp_lv_norm_color                0.005698
tbp_lv_perimeterMM               0.005698
tbp_lv_radial_color_std_max      0.005698
tbp_lv_stdL                      0.005698
tbp_lv_stdLExt                   0.005698
tbp_lv_symm_2axis                0.005698
tbp_lv_symm_2axis_angle          0.005698
tbp_lv_x                         0.005698
tbp_lv_y                         0.005698
tbp_lv_z                         0.005698
mel_mitotic_index                0.005698
mel_thick_mm                    99.641026
tbp_lv_dnn_lesion_confidence     0.005698
dtype: float64


In [8]:
X_train, X_test, y_train, y_test = train_test_split(np.array(data), np.array(labels), test_size=0.2, random_state=42)

# Convert the data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.float32).unsqueeze(1)  # Add an extra dimension for the output
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long).unsqueeze(1)

# Create PyTorch DataLoader for efficient batch processing
train_dataset = TensorDataset(X_train, y_train)
train_loader = DataLoader(train_dataset, shuffle=True)

test_dataset = TensorDataset(X_test, y_test)
test_loader = DataLoader(test_dataset, shuffle=True)

In [9]:
class ANN(nn.Module):
    def __init__(self,units):
        super(ANN,self).__init__()
        self.l1 = nn.Linear(units,units)
        #self.l2 = nn.Linear(3*units,2*units)
        #self.l3 = nn.Linear(2*units,units)
        self.l4 = nn.Linear(units,1)
        self.relu = nn.ReLU()
        self.lrelu = nn.LeakyReLU()
        self.sigmoid = nn.Sigmoid()
        self.softmax = nn.Softmax()
    def forward(self,x):
        x = self.l1(x)
        x = self.lrelu(x)
        #x = self.lrelu(self.l2(x))
        #x = self.lrelu(self.l3(x))
        x = self.sigmoid(self.l4(x))
        return torch.round(x)

In [11]:
loss_fn=nn.BCELoss()
model = ANN(units=39)
optimizer=optim.SGD(model.parameters(),lr=0.008)
device=torch.device( "cpu")
model.to(device)
eval_losses=[]
eval_accu=[]

def test(epoch):
    model.eval()
    running_loss=0
    correct=0
    total=0
    with torch.no_grad():
        for data in (test_loader):
            inputs,labels=data[0].to(device),data[1].to(device)
            outputs=model(inputs)
            loss= loss_fn(outputs,labels)
            running_loss+=loss.item()
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
    test_loss=running_loss/len(test_loader)
    accu=correct/total
    eval_losses.append(test_loss)
    eval_accu.append(accu)
    print('Test Loss: %.3f | Accuracy: %.3f'%(test_loss,accu))

# Train the model and capture gradient information
epochs = 10
gradients = []

for epoch in range(epochs):
    model.train()
    running_loss = 0.0

    for inputs, labels in train_loader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)

        # Backward pass and optimization
        loss.backward()

        # Capture the gradient of the first layer's weights
        gradients.append(model.l1.weight.grad.clone().detach().numpy())

        optimizer.step()

        running_loss += loss.item()

    # Print the average loss for each epoch
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}")

# Gradient values after each epoch
gradients[-1]  # Showing the last gradient value for the first layer after the final epoch

Epoch 1/10, Loss: 44.3198
Epoch 2/10, Loss: 44.3198
Epoch 3/10, Loss: 44.3198
Epoch 4/10, Loss: 44.3198
Epoch 5/10, Loss: 44.3198
Epoch 6/10, Loss: 44.3198
Epoch 7/10, Loss: 44.3198
Epoch 8/10, Loss: 44.3198
Epoch 9/10, Loss: 44.3198
Epoch 10/10, Loss: 44.3198


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
print(model.l1.weight.grad.clone().detach().numpy())

[[0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [None]:
for epoch in range(epochs):
    model.eval()
    running_loss = 0.0
    total = 0
    correct = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = loss_fn(outputs, labels)
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        running_loss += loss.item()
    accu=correct/total
    # Print the average loss for each epoch
    print(f"Epoch {epoch + 1}/{epochs}, Loss: {running_loss / len(train_loader):.4f}, Accuracy : {accu:.4f}")

# Gradient values after each epoch
# Showing the last gradient value for the first layer after the final epoch

Epoch 1/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 2/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 3/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 4/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 5/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 6/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 7/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 8/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 9/10, Loss: 43.1978, Accuracy : 0.5680
Epoch 10/10, Loss: 43.1978, Accuracy : 0.5680


array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [None]:
regularizer
dropout
batchnorm
model complexity
decrease sample size