### 1.1 Create model to predict `Skin Disease` using `ResNet18` 

### Import Dependencies

In [31]:
import torch
import torch.nn as nn
import torch.optim as optim 
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, WeightedRandomSampler
import os
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import WeightedRandomSampler
from PIL import Image

In [2]:
print(torch.__version__)

2.5.1


In [3]:
print(torch.cuda.is_available())  # Should return True
print(torch.cuda.device_count())  # Number of GPUs
print(torch.cuda.current_device())  # Current GPU ID
print(torch.cuda.get_device_name(torch.cuda.current_device()))

True
1
0
NVIDIA GeForce GTX 1650 Ti


In [19]:
batch_size = 16
num_classes = 10
learning_rate = 0.001
num_epochs = 20
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(f"using device {device}")

using device cuda:0


In [20]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize(256),
        # transforms.RandomResizedCrop(224),
        transforms.CenterCrop(224),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    'test': transforms.Compose([
        transforms.Resize(256),
        transforms.CenterCrop(224),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}


In [21]:
data_dir = r'D:\rahini\balanced_dataset'

# Load dataset
image_datasets = {x: datasets.ImageFolder(root=f"{data_dir}/{x}", transform=data_transforms[x])
                  for x in ['train', 'test']}
class_names = image_datasets['train'].classes  # Define class_names here

# Compute class sample counts
class_sample_counts = [sum(np.array(image_datasets['train'].targets) == i) for i in range(len(class_names))]
class_weights = 1. / np.array(class_sample_counts)

# Assign weights to each sample
sample_weights = [class_weights[target] for target in image_datasets['train'].targets]
sampler = WeightedRandomSampler(sample_weights, num_samples=len(sample_weights), replacement=True)

# Create DataLoader with weighted sampler for training
train_loader = DataLoader(image_datasets['train'], batch_size=batch_size, sampler=sampler,num_workers=4, pin_memory=True)
validation_loader = DataLoader(image_datasets['test'], batch_size=batch_size, shuffle=True,num_workers=4, pin_memory=True)

# Create a dictionary of dataloaders
dataloaders = {'train': train_loader, 'test': validation_loader}

# Dataset sizes for each phase
dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'test']}


In [30]:
class_names = image_datasets['train'].classes
print(class_names)

['1. Eczema 1677', '10. Warts Molluscum and other Viral Infections - 2103', '2. Melanoma 15.75k', '3. Atopic Dermatitis - 1.25k', '4. Basal Cell Carcinoma (BCC) 3323', '5. Melanocytic Nevi (NV) - 7970', '6. Benign Keratosis-like Lesions (BKL) 2624', '7. Psoriasis pictures Lichen Planus and related diseases - 2k', '8. Seborrheic Keratoses and other Benign Tumors - 1.8k', '9. Tinea Ringworm Candidiasis and other Fungal Infections - 1.7k']


In [22]:
model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)
model.fc = nn.Linear(model.fc.in_features, num_classes)  # Adjust the final layer
model = model.to(device)


In [23]:
print(next(model.parameters()).device)  

cuda:0


In [24]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [25]:
def train_model(model, dataloaders, criterion, optimizer, num_epochs=10):
    best_model_wts = model.state_dict()
    best_acc = 0.0

    for epoch in range(num_epochs):
        print(f'Epoch {epoch + 1}/{num_epochs}')
        print('-' * 10)

        # Each epoch has a training and validation phase
        for phase in ['train', 'test']:
            if phase == 'train':
                model.train()  # Set model to training mode
            else:
                model.eval()  # Set model to evaluate mode

            running_loss = 0.0
            running_corrects = 0

            # Iterate over data
            for inputs, labels in dataloaders[phase]:
                inputs, labels = inputs.to(device), labels.to(device)

                # Zero the parameter gradients
                optimizer.zero_grad()

                # Forward pass
                with torch.set_grad_enabled(phase == 'train'):
                    outputs = model(inputs)
                    _, preds = torch.max(outputs, 1)
                    loss = criterion(outputs, labels)

                    # Backward pass and optimize only in training phase
                    if phase == 'train':
                        loss.backward()
                        optimizer.step()

                # Statistics
                running_loss += loss.item() * inputs.size(0)
                running_corrects += torch.sum(preds == labels.data)

            epoch_loss = running_loss / dataset_sizes[phase]
            epoch_acc = running_corrects.double() / dataset_sizes[phase]

            print(f'{phase} Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

            # Deep copy the model
            if phase == 'test' and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = model.state_dict()

    print(f'Best Validation Accuracy: {best_acc:.4f}')
    # Load best model weights
    model.load_state_dict(best_model_wts)
    return model


In [26]:
model = train_model(model, dataloaders, criterion, optimizer, num_epochs)

Epoch 1/20
----------
train Loss: 1.5074 Acc: 0.4186
test Loss: 1.3397 Acc: 0.4675
Epoch 2/20
----------
train Loss: 1.2736 Acc: 0.5064
test Loss: 1.4006 Acc: 0.4850
Epoch 3/20
----------
train Loss: 1.1715 Acc: 0.5493
test Loss: 1.1483 Acc: 0.5608
Epoch 4/20
----------
train Loss: 1.1027 Acc: 0.5797
test Loss: 1.2484 Acc: 0.5279
Epoch 5/20
----------
train Loss: 1.0270 Acc: 0.6075
test Loss: 1.1266 Acc: 0.5842
Epoch 6/20
----------
train Loss: 0.9755 Acc: 0.6352
test Loss: 1.1021 Acc: 0.5879
Epoch 7/20
----------
train Loss: 0.8903 Acc: 0.6693
test Loss: 1.1154 Acc: 0.6033
Epoch 8/20
----------
train Loss: 0.8363 Acc: 0.6983
test Loss: 1.1952 Acc: 0.5763
Epoch 9/20
----------
train Loss: 0.7497 Acc: 0.7280
test Loss: 1.1209 Acc: 0.6179
Epoch 10/20
----------
train Loss: 0.6608 Acc: 0.7619
test Loss: 1.2484 Acc: 0.6113
Epoch 11/20
----------
train Loss: 0.6162 Acc: 0.7836
test Loss: 1.2157 Acc: 0.6088
Epoch 12/20
----------
train Loss: 0.5220 Acc: 0.8181
test Loss: 1.2371 Acc: 0.6158
E

In [27]:
torch.save(model.state_dict(), 'skin_disease_model.pth')


In [32]:
def predict_image(image_path, model, class_names):
    model.eval()
    transform = data_transforms['test']
    image = Image.open(image_path).convert('RGB')
    image = transform(image).unsqueeze(0).to(device)
    # image.unsqueeze(0)
    with torch.no_grad():
        outputs = model(image)
        # print(outputs)
        _, preds = torch.max(outputs, 1)

    return class_names[preds.item()]

In [33]:
img_path = r"D:\rahini\balanced_dataset\train\8. Seborrheic Keratoses and other Benign Tumors - 1.8k\0_24.jpg" 
predict_image(img_path, model, class_names)

'8. Seborrheic Keratoses and other Benign Tumors - 1.8k'

### 1.2 Integrate `Decision Tree` with `ResNet18`
to fulfill the accuracy gap (especially validation accuracy)

In [34]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import joblib

In [35]:
import pandas as pd

In [36]:
df = pd.read_csv("disease_symptoms.csv")
df.head()

Unnamed: 0,Disease,Itchiness,Redness,Burning,Pain,Swelling,Scaling,Blisters,Open Wounds / Pus,White Patches,Lumps or Growths,Dark Spots / Pigmentation,Dry Skin,label
0,Eczema,1,1,0,0,0,1,0,0,0,0,0,1,0
1,Atopic Dermatitis,1,1,0,0,1,0,0,0,0,0,0,1,2
2,Psoriasis,0,1,0,0,0,1,0,0,1,0,0,0,6
3,Fungal Infection,1,1,0,0,0,0,1,0,1,0,0,0,8
4,Melanoma,1,0,0,0,0,0,0,0,0,1,1,0,1


In [37]:
df = df.drop("Disease", axis="columns")
df.head()

Unnamed: 0,Itchiness,Redness,Burning,Pain,Swelling,Scaling,Blisters,Open Wounds / Pus,White Patches,Lumps or Growths,Dark Spots / Pigmentation,Dry Skin,label
0,1,1,0,0,0,1,0,0,0,0,0,1,0
1,1,1,0,0,1,0,0,0,0,0,0,1,2
2,0,1,0,0,0,1,0,0,1,0,0,0,6
3,1,1,0,0,0,0,1,0,1,0,0,0,8
4,1,0,0,0,0,0,0,0,0,1,1,0,1


In [38]:
symptoms ={
    i:df[df["label"] == i].drop("label", axis="columns").to_numpy()
    for i in range(10)
}

In [39]:
symptoms[0]

array([[1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1]], dtype=int64)

In [40]:
X_train, y_train = [], []
for images, labels in train_loader:
    for i in range(len(labels)):
        img = images[i]
        label = labels[i]
        img = img.unsqueeze(0).to(device)
        with torch.no_grad():
            out = model(img)
        
        pred = out[0].cpu().numpy()
        rd_symptom = symptoms[label.item()][np.random.randint(len(symptoms[label.item()]))]
        _x = np.concatenate([pred, rd_symptom])        
        X_train.append(_x)
        y_train.append(label.item())

In [41]:
X_train[0]

array([ -0.46364403,   6.32823038, -11.68267441,  -7.68953466,
       -10.76054287,  -8.56745243, -10.60430241,   0.11241622,
        -4.69105768,  -4.128335  ,   1.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   1.        ,
         1.        ,   0.        ])

In [42]:
y_train[0]

1

In [43]:
cls = DecisionTreeClassifier()

In [44]:
cls.fit(X_train, y_train)
joblib.dump(cls, "decision_tree_model.pkl")  # Save model as a file
print("Decision Tree model saved successfully!")

Decision Tree model saved successfully!


In [45]:
cls.predict([X_train[401]])

array([9])

In [46]:
y_train[401]

9

In [47]:
X_test, y_test = [], []
for images, labels in validation_loader:
    for i in range(len(labels)):
        img = images[i]
        label = labels[i]
        img = img.unsqueeze(0).to(device)
        with torch.no_grad():
            out = model(img)
        
        pred = out[0].cpu().numpy()
        rd_symptom = symptoms[label.item()][np.random.randint(len(symptoms[label.item()]))]
        _x = np.concatenate([pred, rd_symptom])        
        X_test.append(_x)
        y_test.append(label.item())

In [48]:
cls.score(X_test, y_test)

0.9966666666666667

<table style="float: left;">
    <tr>
        <th>Method</th>
        <th>Accuracy</th>
    </tr>
    <tr>
        <td>without Decision Tree</td>
        <td style="color: orange;">0.6325</td>
    </tr>
    <tr>
        <td>with Decision Tree</td>
        <td style="color: green;">0.9966</td>
    </tr>
</table>
