In [48]:
import opendatasets as od
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import transforms
from PIL import Image
from sklearn.preprocessing import LabelEncoder
from torch.optim import Adam
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt

In [49]:
#od.download('https://www.kaggle.com/datasets/samithsachidanandan/human-face-emotions')

In [50]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [51]:
'''
over 59 thousand images
'''

#sentiments-data/Data/
#random_image_classification/data/data/
#lung_diseases/lung_colon_image_set/

image_paths = []
labels = []
path = 'lung_diseases/lung_colon_image_set/'

# Now i can geenralise if i want to change the dataset
"""
for label in os.listdir(path):
    for image in os.listdir(path+str(label)):
        image_paths.append(path+str(label)+'/'+str(image))
        labels.append(label)
"""

for sEt in os.listdir(path):
    for label in os.listdir(path+sEt):
        for image in os.listdir(path+sEt+'/'+label):
            image_paths.append(path+sEt+'/'+label+'/'+image)
            labels.append(label)

In [52]:
data_df = pd.DataFrame(zip(image_paths,labels), columns = ['image_path', 'label'])
data_df

Unnamed: 0,image_path,label
0,lung_diseases/lung_colon_image_set/Test Set/co...,colon_aca
1,lung_diseases/lung_colon_image_set/Test Set/co...,colon_aca
2,lung_diseases/lung_colon_image_set/Test Set/co...,colon_aca
3,lung_diseases/lung_colon_image_set/Test Set/co...,colon_aca
4,lung_diseases/lung_colon_image_set/Test Set/co...,colon_aca
...,...,...
24995,lung_diseases/lung_colon_image_set/Train and V...,lung_scc
24996,lung_diseases/lung_colon_image_set/Train and V...,lung_scc
24997,lung_diseases/lung_colon_image_set/Train and V...,lung_scc
24998,lung_diseases/lung_colon_image_set/Train and V...,lung_scc


In [53]:
data_df.dropna(inplace = True)

train_df = data_df.sample(frac = 0.7)
test_df = data_df.drop(train_df.index)



In [54]:
label_encoder = LabelEncoder()

label_encoder.fit(train_df['label'])

transform = transforms.Compose([
    transforms.Resize((128,128)),
    transforms.ToTensor(),
    transforms.ConvertImageDtype(torch.float)
])

In [55]:
class dataset(Dataset):
    def __init__(self, df, transform):
        self.df = df
        self.labels = torch.tensor(label_encoder.transform(df['label'])).to(device)
        self.transform = transform
    def __len__(self):
        return len(self.df)
    def __getitem__(self, index):
        required_label = self.labels[index]
        required_image_path = self.df.iloc[index,0]

        required_image = Image.open(required_image_path).convert('RGB')

        required_image = self.transform(required_image).to(device)

        return required_image, required_label

train_dataset = dataset(train_df,transform)
test_dataset = dataset(test_df,transform)

In [56]:
BATCH_SIZE = 300

train_dataloader = DataLoader(train_dataset, batch_size = BATCH_SIZE, shuffle = True)
test_dataloader = DataLoader(test_dataset, batch_size = BATCH_SIZE, shuffle = True)

In [62]:
from torchsummary import summary

D_LAYER_CONST = 20

class MyModel(nn.Module):
    def __init__(self):
        super(MyModel,self).__init__()

        self.conv1 = nn.Conv2d(3,32, kernel_size = 3, padding = 1)
        self.conv2 = nn.Conv2d(32,64, kernel_size = 3, padding = 1)
        self.conv3 = nn.Conv2d(64,128, kernel_size = 3, padding = 1)
        self.conv4 = nn.Conv2d(128,256, kernel_size = 3, padding = 1)
        self.conv5 = nn.Conv2d(256,512, kernel_size = 3, padding = 1)

        self.pool = nn.MaxPool2d(2,2)

        self.relu = nn.ReLU()

        self.flat = nn.Flatten()

        self.dropout = nn.Dropout(0.5)

        self.dLayer1 = nn.Linear(256*8*8,D_LAYER_CONST) # EDIT THIS TO FIT DEPENDING ON LAST CONV LAYER
        self.dLayer2 = nn.Linear(D_LAYER_CONST, len(data_df['label'].unique()))
        
    def forward(self,x):
        x = self.conv1(x) # 32 x 128 x 128
        x = self.pool(x) # 32 x 64 x 64
        x = self.relu(x)
        
        x = self.conv2(x) # 64 x 64 x 64
        x = self.pool(x) # 64 x 32 x 32 
        x = self.relu(x)
        
        x = self.conv3(x) # 128 x 32 x 32
        x = self.pool(x) # 128 x 16 x 16
        x = self.relu(x)

        x = self.conv4(x) # 256 x 16 x 16
        x = self.pool(x) # 256 x 8 x 8
        x = self.relu(x)

        """
        x = self.conv5(x) # 512 x 8 x 8
        x = self.pool(x) # 512 x 4 x 4
        x = self.relu(x)"""

        x = self.flat(x)
        x = self.dropout(x)
        x = self.dLayer1(x)
        x = self.dropout(x)
        x = self.dLayer2(x)

        return x

model = MyModel().to(device)
model.load_state_dict(torch.load("save_file.pth"))
summary(model, (3,128,128))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Conv2d-1         [-1, 32, 128, 128]             896
         MaxPool2d-2           [-1, 32, 64, 64]               0
              ReLU-3           [-1, 32, 64, 64]               0
            Conv2d-4           [-1, 64, 64, 64]          18,496
         MaxPool2d-5           [-1, 64, 32, 32]               0
              ReLU-6           [-1, 64, 32, 32]               0
            Conv2d-7          [-1, 128, 32, 32]          73,856
         MaxPool2d-8          [-1, 128, 16, 16]               0
              ReLU-9          [-1, 128, 16, 16]               0
           Conv2d-10          [-1, 256, 16, 16]         295,168
        MaxPool2d-11            [-1, 256, 8, 8]               0
             ReLU-12            [-1, 256, 8, 8]               0
          Flatten-13                [-1, 16384]               0
          Dropout-14                [-1

  model.load_state_dict(torch.load("save_file.pth"))


In [63]:
LR = 1e-3
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(),lr = LR )

train_dataset.__getitem__(400)

(tensor([[[0.8392, 0.8353, 0.8235,  ..., 0.8157, 0.7529, 0.7725],
          [0.8392, 0.8392, 0.8314,  ..., 0.7804, 0.7843, 0.7686],
          [0.8392, 0.8392, 0.8392,  ..., 0.8039, 0.8196, 0.8118],
          ...,
          [0.7725, 0.6510, 0.6039,  ..., 0.7490, 0.7686, 0.7804],
          [0.7529, 0.6588, 0.6902,  ..., 0.7725, 0.7765, 0.7843],
          [0.7412, 0.7490, 0.7843,  ..., 0.7882, 0.7804, 0.7922]],
 
         [[0.7882, 0.7804, 0.7451,  ..., 0.6667, 0.6000, 0.6314],
          [0.7843, 0.7882, 0.7765,  ..., 0.5804, 0.5843, 0.5961],
          [0.7843, 0.7843, 0.7765,  ..., 0.6000, 0.6157, 0.6118],
          ...,
          [0.2471, 0.1255, 0.0392,  ..., 0.5647, 0.5922, 0.5961],
          [0.3490, 0.2667, 0.2706,  ..., 0.5843, 0.5882, 0.5922],
          [0.2157, 0.2784, 0.3412,  ..., 0.5922, 0.5804, 0.5882]],
 
         [[0.7686, 0.7647, 0.7725,  ..., 0.7725, 0.7647, 0.7804],
          [0.7686, 0.7569, 0.7529,  ..., 0.7451, 0.7647, 0.7569],
          [0.7725, 0.7765, 0.7843,  ...,

In [61]:
EPOCHS = 4

train_plot = []
acc_plot = []
final_acc = 0

for epoch in range(EPOCHS):
    train_cum = 0
    acc_cum = 0
    i = 0
    for data in train_dataloader:
        inputs, label = data
        prediction = model(inputs)
        loss_object = criterion(prediction, label)
        train_cum += loss_object.item()
        loss_object.backward()
        optimizer.step()
        optimizer.zero_grad()
        acc_cum+= (torch.argmax(prediction, axis=1)==label).sum().item()
        i += 1
        print(str(BATCH_SIZE*i)+'/'+str(train_dataset.__len__())+' data processed for this epoch')
    final_acc = acc_cum/train_dataset.__len__()*100
    acc_plot.append(final_acc)
    train_plot.append(train_cum)
    print('EPOCH '+str(epoch)+' COMPLETED')
    print('LOSS: '+str(train_cum))
    print('ACCURACY: '+str(final_acc)+'%')

train_acc_cum = final_acc


300/17500 data processed for this epoch
600/17500 data processed for this epoch
900/17500 data processed for this epoch
1200/17500 data processed for this epoch
1500/17500 data processed for this epoch
1800/17500 data processed for this epoch
2100/17500 data processed for this epoch
2400/17500 data processed for this epoch
2700/17500 data processed for this epoch
3000/17500 data processed for this epoch
3300/17500 data processed for this epoch
3600/17500 data processed for this epoch
3900/17500 data processed for this epoch
4200/17500 data processed for this epoch
4500/17500 data processed for this epoch
4800/17500 data processed for this epoch
5100/17500 data processed for this epoch
5400/17500 data processed for this epoch
5700/17500 data processed for this epoch
6000/17500 data processed for this epoch
6300/17500 data processed for this epoch
6600/17500 data processed for this epoch
6900/17500 data processed for this epoch
7200/17500 data processed for this epoch
7500/17500 data pro

KeyboardInterrupt: 

In [60]:
#torch.save(model.state_dict(),"save_file.pth")

In [64]:

with torch.no_grad():
    model.eval()
    acc_cum = 0
    train_cum = 0
    i = 0
    for data in test_dataloader:
        inputs, label = data
        prediction = model(inputs)
        loss_object = criterion(prediction, label)
        train_cum += loss_object.item()
        acc_cum+= (torch.argmax(prediction, axis=1)==label).sum().item()
        i += 1
        print(str(BATCH_SIZE*i)+'/'+str(test_dataset.__len__())+' data processed for this epoch')
    final_acc = acc_cum/test_dataset.__len__()*100
    acc_plot.append(final_acc)
    train_plot.append(train_cum)
    print('EPOCH '+str(epoch)+' COMPLETED')
    print('LOSS: '+str(train_cum))
    print('ACCURACY: '+str(final_acc)+'%')
    model.train()

test_acc_cum = final_acc

300/7500 data processed for this epoch
600/7500 data processed for this epoch
900/7500 data processed for this epoch
1200/7500 data processed for this epoch
1500/7500 data processed for this epoch
1800/7500 data processed for this epoch
2100/7500 data processed for this epoch
2400/7500 data processed for this epoch
2700/7500 data processed for this epoch
3000/7500 data processed for this epoch
3300/7500 data processed for this epoch
3600/7500 data processed for this epoch
3900/7500 data processed for this epoch
4200/7500 data processed for this epoch
4500/7500 data processed for this epoch
4800/7500 data processed for this epoch
5100/7500 data processed for this epoch
5400/7500 data processed for this epoch
5700/7500 data processed for this epoch
6000/7500 data processed for this epoch
6300/7500 data processed for this epoch
6600/7500 data processed for this epoch
6900/7500 data processed for this epoch
7200/7500 data processed for this epoch
7500/7500 data processed for this epoch
EPO

In [65]:
print('TRAIN ACCURACY: '+ str(train_acc_cum)+'%')
print('TEST ACCURACY: '+ str(test_acc_cum)+'%')

TRAIN ACCURACY: 95.08716323296355%
TEST ACCURACY: 95.92%
