In [53]:
import numpy as np
import os
import glob
import torch
import matplotlib.image as mpimg
from pathlib import Path
import matplotlib.pyplot as plt
from skimage import transform
from sklearn.model_selection import train_test_split
data_path=Path('/Users/maksimtrebusinin/Desktop/Cancer_data')
list_dir=os.listdir(data_path)[1:]
from sklearn.metrics import accuracy_score, confusion_matrix
import seaborn as sns
import time
from torch.utils.data import DataLoader, TensorDataset

# Preprocessing Data

In [2]:
X=list()
y=[0]*4000
index=0
for i, column_name in enumerate(list_dir):
    for img_path in glob.iglob(os.path.join(data_path, column_name, '*')):
        X.append(np.asarray(mpimg.imread(img_path)))
        y[index]=i
        index+=1
#0 is Healthy
#1 is Melanoma
#2 is Bascal cell carcioma
#3 is Squamous cell carcinoma
y=np.array(y)
transfrom_X=[transform.resize(image, (159,159,3)) for image in X]

In [16]:
# train and test selections
X_train, X_test, y_train, y_test = train_test_split(transfrom_X, y, test_size=0.25, stratify=y,
                                                    random_state=42)
X_train_tensor=torch.FloatTensor(X_train)
X_test_tensor=torch.FloatTensor(X_test)
y_train_tensor=torch.from_numpy(y_train)
y_test_tensor=torch.from_numpy(y_test)

X_train_tensor=X_train_tensor.reshape(3000, 3, 159, 159)
X_test_tensor=X_test_tensor.reshape(1000, 3, 159, 159)

# Convolution Network

In [51]:
class CancerNet(torch.nn.Module):
    def __init__(self):
        super(CancerNet, self).__init__()

        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=30, kernel_size=4)
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act1 = torch.nn.ReLU()

        self.conv2 = torch.nn.Conv2d(in_channels=30, out_channels=60, kernel_size=3, padding=1)
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act2 = torch.nn.ReLU()

        self.conv3 = torch.nn.Conv2d(in_channels=60, out_channels=120, kernel_size=2)
        self.pool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act3 = torch.nn.ReLU()

        self.conv4 = torch.nn.Conv2d(in_channels=120, out_channels=80, kernel_size=3, padding=1)
        self.pool4 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act4 = torch.nn.ReLU()

        self.conv5 = torch.nn.Conv2d(in_channels=80, out_channels=30, kernel_size=3, padding=1)
        self.act5 = torch.nn.ReLU()

        # Добавляем Dropout после последней активации перед полносвязным слоем
        self.dropout1 = torch.nn.Dropout(p=0.5)  # 50% случайных обнулений

        self.fc1 = torch.nn.Linear(9*9*30, 200)
        self.act6 = torch.nn.ReLU()
        self.dropout2 = torch.nn.Dropout(p=0.5)  # 50% Dropout на скрытом слое
        self.fc2 = torch.nn.Linear(200, 4)
        self.sm = torch.nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.pool1(x)
        x = self.act1(x)
        x = self.conv2(x)
        x = self.pool2(x)
        x = self.act2(x)
        x = self.conv3(x)
        x = self.pool3(x)
        x = self.act3(x)
        x = self.conv4(x)
        x = self.pool4(x)
        x = self.act4(x)
        x = self.conv5(x)
        x = self.act5(x)

        # Применяем Dropout после всех сверток и активаций
        x = self.dropout1(x)

        x = x.view(x.size(0), x.size(1) * x.size(2) * x.size(3))  # Преобразуем в вектор для подачи на fc

        x = self.fc1(x)
        x = self.act6(x)
        
        # Применяем Dropout после первого полносвязного слоя
        x = self.dropout2(x)

        x = self.fc2(x)
        return x

    def inference(self, x):
        x = self.forward(x)
        x = self.sm(x)
        return x


In [56]:
cancer_net=CancerNet()
loss=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(cancer_net.parameters(), lr=0.001)

In [80]:
def train_model(model, X_tr, y_tr, epoch, batch_size, opt, loss_fn):
    start_time = time.time()  # Начинаем отсчёт времени
    
    # Преобразуем данные в TensorDataset и используем DataLoader
    dataset = TensorDataset(X_tr, y_tr)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Инициализация переменных для отслеживания потерь
    for ep in range(epoch):
        epoch_start_time = time.time()
        total_loss = 0.0  # Сумма потерь за одну эпоху
        model.train()  # Устанавливаем модель в режим тренировки

        for X_batch, y_batch in train_loader:
            opt.zero_grad()

            # Прогоняем батч через модель
            pred = model(X_batch)
            
            # Вычисляем потерю
            loss_value = loss_fn(pred, y_batch)
            total_loss += loss_value.item()

            # Обратное распространение и шаг оптимизации
            loss_value.backward()
            opt.step()

        epoch_end_time = time.time()
        epoch_duration = epoch_end_time - epoch_start_time
        hours, rem = divmod(epoch_duration, 3600)
        minutes, seconds = divmod(rem, 60)

        # Выводим статистику о потере и времени
        print(f"Epoch {ep + 1}/{epoch}, Loss: {total_loss / len(train_loader):.4f}, Time: {int(hours)}h {int(minutes)}m {int(seconds)}s")
    
    total_duration = time.time() - start_time  # Общее время тренировки
    hours, rem = divmod(total_duration, 3600)
    minutes, seconds = divmod(rem, 60)
    
    print(f"Training completed in: {int(hours)}h {int(minutes)}m {int(seconds)}s")

def predict(model, X_test):
    model.eval()  # Переводим модель в режим инференса (выключается Dropout и BatchNorm)
    with torch.no_grad():  # Отключаем вычисление градиентов
        predictions = model.forward(X_test_tensor) # Получаем прогнозы
        predictions = predictions.argmax(dim=1)
        predictions = np.array(predictions)
        return predictions

In [57]:
train_model(cancer_net, X_train_tensor,y_train_tensor, 100, 64, optimizer, loss)

In [58]:
def metrics(y_true, y_pred):
    print('Accuracy score:', accuracy_score(y_true, y_pred))
    mat=confusion_matrix(y_true, y_pred)
    sns.heatmap(mat, annot=True, fmt='d', cmap='coolwarm')

In [63]:
y_pred = predict(cancer_net, X_test_tensor)
print(y_pred)

In [64]:
metrics(y_test, np.array(y_pred))

In [70]:
cancer_netV2=CancerNet()
loss=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(cancer_netV2.parameters(), lr=0.001)

In [71]:
train_model(cancer_netV2, X_train_tensor,y_train_tensor, 90, 256, optimizer, loss)

In [76]:
y_pred2 = predict(cancer_netV2, X_test_tensor)
metrics(y_test, np.array(y_pred2))

In [73]:
cancer_netV3=CancerNet()
loss=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(cancer_netV3.parameters(), lr=0.001)

In [74]:
train_model(cancer_netV3, X_train_tensor,y_train_tensor, 100, 32, optimizer, loss)

In [75]:
y_pred3 = predict(cancer_netV3, X_test_tensor)
metrics(y_test, np.array(y_pred3))

In [77]:
class CancerNetV2(torch.nn.Module):
    def __init__(self):
        super(CancerNetV2, self).__init__()

        # Свёрточные слои с BatchNorm
        self.conv1 = torch.nn.Conv2d(in_channels=3, out_channels=30, kernel_size=4)
        self.bn1 = torch.nn.BatchNorm2d(30)  # Пакетная нормализация
        self.pool1 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act1 = torch.nn.ReLU()

        self.conv2 = torch.nn.Conv2d(in_channels=30, out_channels=60, kernel_size=3, padding=1)
        self.bn2 = torch.nn.BatchNorm2d(60)
        self.pool2 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act2 = torch.nn.ReLU()

        self.conv3 = torch.nn.Conv2d(in_channels=60, out_channels=120, kernel_size=2)
        self.bn3 = torch.nn.BatchNorm2d(120)
        self.pool3 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act3 = torch.nn.ReLU()

        self.conv4 = torch.nn.Conv2d(in_channels=120, out_channels=80, kernel_size=3, padding=1)
        self.bn4 = torch.nn.BatchNorm2d(80)
        self.pool4 = torch.nn.MaxPool2d(kernel_size=2, stride=2)
        self.act4 = torch.nn.ReLU()

        self.conv5 = torch.nn.Conv2d(in_channels=80, out_channels=30, kernel_size=3, padding=1)
        self.bn5 = torch.nn.BatchNorm2d(30)
        self.act5 = torch.nn.ReLU()

        # Dropout после сверток
        self.dropout1 = torch.nn.Dropout(p=0.5)

        # Полносвязные слои с BatchNorm
        self.fc1 = torch.nn.Linear(9*9*30, 200)
        self.bn_fc1 = torch.nn.BatchNorm1d(200)
        self.act6 = torch.nn.ReLU()
        self.dropout2 = torch.nn.Dropout(p=0.5)  # Dropout на скрытом слое
        self.fc2 = torch.nn.Linear(200, 4)
        self.sm = torch.nn.Softmax(dim=1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)  # Применяем BatchNorm
        x = self.pool1(x)
        x = self.act1(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.pool2(x)
        x = self.act2(x)

        x = self.conv3(x)
        x = self.bn3(x)
        x = self.pool3(x)
        x = self.act3(x)

        x = self.conv4(x)
        x = self.bn4(x)
        x = self.pool4(x)
        x = self.act4(x)

        x = self.conv5(x)
        x = self.bn5(x)
        x = self.act5(x)

        # Применяем Dropout
        x = self.dropout1(x)

        # Преобразуем тензор для подачи на полносвязный слой
        x = x.view(x.size(0), -1)

        x = self.fc1(x)
        x = self.bn_fc1(x)  # BatchNorm для полносвязного слоя
        x = self.act6(x)

        # Dropout на скрытом слое
        x = self.dropout2(x)

        x = self.fc2(x)
        return x

    def inference(self, x):
        x = self.forward(x)
        x = self.sm(x)
        return x

    def l1_regularization(self, model, lambda_l1=0.001):
        # Рассчитываем L1 регуляризацию
        l1_norm = sum(p.abs().sum() for p in model.parameters())
        return lambda_l1 * l1_norm


In [105]:
def train_model_for_L1(model, X_tr, y_tr, epoch, batch_size, opt, loss_fn, lambda_l1=0.004):
    start_time = time.time()  # Начинаем отсчёт времени
    
    # Преобразуем данные в TensorDataset и используем DataLoader
    dataset = TensorDataset(X_tr, y_tr)
    train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    # Инициализация переменных для отслеживания потерь
    for ep in range(epoch):
        epoch_start_time = time.time()
        total_loss = 0.0  # Сумма потерь за одну эпоху
        model.train()  # Устанавливаем модель в режим тренировки

        for X_batch, y_batch in train_loader:
            opt.zero_grad()

            # Прогоняем батч через модель
            pred = model(X_batch)
            
            # Вычисляем основную потерю
            loss_value = loss_fn(pred, y_batch)

            # Вычисляем L1 регуляризацию
            l1_loss = model.l1_regularization(model, lambda_l1)

            # Общая потеря = основная потеря + L1 регуляризация
            total_loss_value = loss_value + l1_loss
            total_loss += total_loss_value.item()

            # Обратное распространение и шаг оптимизации
            total_loss_value.backward()
            opt.step()

        epoch_end_time = time.time()
        epoch_duration = epoch_end_time - epoch_start_time
        hours, rem = divmod(epoch_duration, 3600)
        minutes, seconds = divmod(rem, 60)

        # Выводим статистику о потере и времени
        print(f"Epoch {ep + 1}/{epoch}, Loss: {total_loss / len(train_loader):.4f}, Time: {int(hours)}h {int(minutes)}m {int(seconds)}s")
    
    total_duration = time.time() - start_time  # Общее время тренировки
    hours, rem = divmod(total_duration, 3600)
    minutes, seconds = divmod(rem, 60)
    
    print(f"Training completed in: {int(hours)}h {int(minutes)}m {int(seconds)}s")


In [106]:
cancer_netV4=CancerNetV2()
loss=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(cancer_netV4.parameters(), lr=0.001)

In [107]:
train_model_for_L1(cancer_netV4, X_train_tensor, y_train_tensor, 200, 256, optimizer, loss)

In [108]:
y_pred4 = predict(cancer_netV4, X_test_tensor)
metrics(y_test, np.array(y_pred4))

# Result

In [92]:
y_pred2 = predict(cancer_netV2, X_test_tensor)
metrics(y_test, np.array(y_pred2))

In [110]:
torch.save(cancer_netV2.state_dict(), 'CancerNet7.pth')