In [3]:
import pandas as pd
import os
from skimage import io
from skimage.transform import resize
import torch
import torchvision
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms

In [17]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [2]:
df = pd.read_csv("dogcat\\cat.csv")
df2 = df.copy()
print(len(df))

12500


In [3]:
for index, row in df.iterrows():
    id_value = row["id"]
    label_value = row["label"]
    id_value -= 1
    id_str = str(id_value)
    value = f"cat.{id_str}.JPG"
    #print(value)
    #print(label_value)
    df["id"] = value
    df["label"] = label_value

In [4]:
for index, row in df2.iterrows():
    id_value = row["id"]
    label_value = row["label"]
    id_value -= 1
    id_str = str(id_value)
    value = f"dog.{id_str}.JPG"
    #print(value)
    #print(label_value)
    df2["id"] = value
    df2["label"] = label_value+1

In [5]:
#print(df2)

In [6]:
result_df = pd.concat([df, df2], axis=0)
print(result_df)

                  id  label
0      cat.12499.JPG      0
1      cat.12499.JPG      0
2      cat.12499.JPG      0
3      cat.12499.JPG      0
4      cat.12499.JPG      0
...              ...    ...
12495  dog.12499.JPG      1
12496  dog.12499.JPG      1
12497  dog.12499.JPG      1
12498  dog.12499.JPG      1
12499  dog.12499.JPG      1

[25000 rows x 2 columns]


In [7]:
class catdogDataset(Dataset):
    def __init__(self, annotation, rootdic, transform = None, target_size=(256, 256)):
        self.annotation = annotation
        self.rootdic = rootdic
        self.transform = transform
        self.target_size = target_size
        
    def __len__(self):
        return len(self.annotation)
    def __getitem__(self, index):
        imgPath = os.path.join(self.rootdic, self.annotation.iloc[index, 0])
        image = io.imread(imgPath)
        image = resize(image, self.target_size, anti_aliasing=True) # convert the image into 256x256
        
        y = torch.tensor([float(self.annotation.iloc[index, 1])])
        
        if self.transform:
            image = self.transform(image)
        
        return (image, y)
        

In [8]:
dataset = catdogDataset(annotation = result_df, rootdic = "dogcat\\traindata", transform = transforms.ToTensor(), target_size=(256, 256))

In [9]:
train_set, test_set = torch.utils.data.random_split(dataset, [20000, 5000])

In [10]:
in_channel = 3
num_classes = 2
batch_size = 32
num_epochs = 5
learning_rate = 0.001

In [11]:
train_loader = DataLoader(dataset = train_set, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(dataset = test_set, batch_size = batch_size, shuffle = True)

In [12]:
for batch, (data, target) in enumerate(train_loader):
    print(data.shape)
    print(target.shape)
    break

torch.Size([32, 3, 256, 256])
torch.Size([32, 1])


In [13]:
class SimpleCNN(nn.Module):
    def __init__(self, num_classes):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
        self.batch_norm1 = nn.BatchNorm2d(64)  # Add Batch Normalization
        self.relu1 = nn.ReLU()
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout1 = nn.Dropout(0.25)  # Add Dropout

        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.batch_norm2 = nn.BatchNorm2d(128)  # Add Batch Normalization
        self.relu2 = nn.ReLU()
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout2 = nn.Dropout(0.25)  # Add Dropout

        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
        self.batch_norm3 = nn.BatchNorm2d(256)  # Add Batch Normalization
        self.relu3 = nn.ReLU()
        self.pool3 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.dropout3 = nn.Dropout(0.25)  # Add Dropout

        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(256 * (256 // 8) * (256 // 8), 512)
        self.batch_norm4 = nn.BatchNorm1d(512)  # Add Batch Normalization
        self.relu4 = nn.ReLU()
        self.dropout4 = nn.Dropout(0.5)  # Add Dropout
        self.fc2 = nn.Linear(512, 1)

    def forward(self, x):
        x = self.pool1(self.dropout1(self.relu1(self.batch_norm1(self.conv1(x)))))
        x = self.pool2(self.dropout2(self.relu2(self.batch_norm2(self.conv2(x)))))
        x = self.pool3(self.dropout3(self.relu3(self.batch_norm3(self.conv3(x)))))
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc2(self.dropout4(self.relu4(self.batch_norm4(x))))
        return x




In [14]:
model = SimpleCNN(num_classes)  # Example model
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [None]:
for epoch in range(num_epochs):
    model.train()  # Set the model to training mode

    for images, labels in train_loader:
        # Convert input data to the desired data type
        images = images.to(torch.float32)
        labels = labels.to(torch.float32) 

        # Forward pass
        outputs = model(images)

        # Calculate loss
        loss = criterion(outputs, labels)
        print(loss.item())
         #print(outputs)
        #print(labels)

        # Backward pass and optimization
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    # Print the loss after each epoch
    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}")

# Optionally, evaluate the model on the test set after training
model.eval()
with torch.no_grad():
    for images, labels in test_loader:
        # Convert input data to the desired data type
        images = images.to(torch.float32)
        labels = labels.to(torch.float32) 

        # Forward pass and evaluation logic
        outputs = model(images)

        # Calculate and print any evaluation metric (e.g., accuracy)
        _, predicted = torch.max(outputs.data, 1)
        accuracy = (predicted == labels).sum().item() / labels.size(0)
        print(f"Accuracy on the test set: {accuracy * 100:.2f}%")

0.8709778189659119
0.008770090527832508
0.014284297823905945
0.01035232748836279
0.02531273663043976
0.013792121782898903
0.01039901003241539
0.014010470360517502
0.008060258813202381
0.009519131854176521
0.007821165025234222
0.017257990315556526
0.005997910629957914
0.005780461244285107
0.01338915340602398
0.00647020386531949
0.005773739889264107
0.005205187480896711
0.006142358295619488
0.008049591444432735
0.005675383843481541
0.00419302424415946
0.004385827574878931
0.004970822483301163
0.004096026532351971
0.007431075908243656
0.004686540458351374
0.009319777600467205
0.0035257297568023205
0.0053640250116586685
0.03188356012105942
0.0047114999033510685
0.003110310761258006
0.03097016178071499
0.0046474686823785305
0.006445405073463917
0.0040879580192267895
0.0070683155208826065
0.0026093253400176764
0.004416474141180515
0.0035135233774781227
0.002729118103161454
0.002233682433143258
0.002032893244177103
0.0021710593719035387
0.0017150412313640118
0.0018809294560924172
0.0038589895

5.950868580839597e-05
9.157492604572326e-05
0.0001891893334686756
9.565804793965071e-05
9.915627015288919e-05
0.00014570998609997332
6.658942584181204e-05
0.0002972251095343381
8.159857679856941e-05
0.00011883881961693987
0.00017703569028526545
0.0003933293919544667
0.00039343145908787847
6.211626168806106e-05
6.4127663790714e-05
0.000120506614621263
0.0013529497664421797
5.1727285608649254e-05
5.9929727285634726e-05
0.00013082163059152663
0.0002659310703165829
0.00017594758537597954


In [1]:
VGG16 = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M']
# Linear layer 4096x4096x1000

In [11]:
class VGG_net(nn.Module):
    def __init__(self, in_channels = 3, num_classes = 1000):
        super(VGG_net, self).__init__()
        self.in_channels = in_channels
        self.conv_layers = self.create_conv_layers(VGG16)
        
        self.fcs = nn.Sequential(
            nn.Linear(512*7*7, 4096), # image shape is 224x224 and 5 time Maxpooling so 224/2^5 = 7
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, 4096),
            nn.ReLU(),
            nn.Dropout(p=0.5),
            nn.Linear(4096, num_classes)
        )
        
    def forward(self, x):
        x = self.conv_layers(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fcs(x)
        return x
    
    def create_conv_layers(self, arc):
        layers = []
        in_channels = self.in_channels
        
        for x in arc:
            if type(x) == int:
                out_channels = x
                layers += [nn.Conv2d(in_channels = in_channels, out_channels = out_channels,
                                    kernel_size = (3,3), stride = (1,1), padding = (1,1)), nn.BatchNorm2d(x), nn.ReLU()]
                in_channels = x
                
            elif x == 'M':
                layers += [nn.MaxPool2d(kernel_size = (2,2), stride = (2,2))]
                
        return nn.Sequential(*layers)

In [18]:
model = VGG_net(in_channels = 3, num_classes = 1000).to(device)

In [20]:
x = torch.randn(1, 3, 224, 224).to(device)
print(model(x).shape)
# Input image must have shape 224x224

torch.Size([1, 1000])
