In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, random_split
import os

In [2]:
# Set device - THIS WILL NOW USE GPU!
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Verify GPU info
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"GPU Memory: {torch.cuda.get_device_properties(0).total_memory / 1e9:.2f} GB")

Using device: cuda
GPU: NVIDIA GeForce RTX 3050 Laptop GPU
GPU Memory: 4.29 GB


In [3]:
#def train():
    # --- 1. SETTINGS ---
    # Standard settings for AI images
transform = transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

In [4]:
# LOAD TRAIN DATA
train_path = os.path.join('data', 'train')
train_dataset = datasets.ImageFolder(root=train_path, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True,pin_memory=True)
    
# LOAD VALID DATA (To check accuracy)
val_path = os.path.join('data', 'valid')
val_dataset = datasets.ImageFolder(root=val_path, transform=transform)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=True,pin_memory=True)

# Load Test DATA 
test_path = os.path.join('data','test')
test_dataset = datasets.ImageFolder(root=test_path, transform = transform)
test_loader = DataLoader(test_dataset,batch_size=32,shuffle=True,pin_memory=True)

In [5]:
#Visualizing the image
image_tensor, label = train_dataset[0]
print(image_tensor,label)

tensor([[[-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         ...,
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179],
         [-2.1179, -2.1179, -2.1179,  ..., -2.1179, -2.1179, -2.1179]],

        [[-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         ...,
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357],
         [-2.0357, -2.0357, -2.0357,  ..., -2.0357, -2.0357, -2.0357]],

        [[-1.8044, -1.8044, -1.8044,  ..., -1.8044, -1.8044, -1.8044],
         [-1.8044, -1.8044, -1.8044,  ..., -1

In [6]:
print("train_dataset len:- ", len(train_dataset))
print("val_dataset len:- ", len(val_dataset))
print("test_dataset len:- ", len(test_dataset))

train_dataset len:-  11793
val_dataset len:-  1123
test_dataset len:-  562


In [7]:
# Making Model 
class CNN_model(nn.Module):
    def __init__(self, num_classes):
        super(CNN_model,self).__init__()
        self.feature = nn.Sequential(
                nn.Conv2d(in_channels=3,out_channels = 32,kernel_size=3,padding=1),       # Out_channels= 32 because we will be taking 32 filters . out_channnels=no. of filters . we dont need to define them seperately.
                nn.ReLU(),                                 #Total size :- 224 X 224
                nn.MaxPool2d(kernel_size=2,stride=2),      #Total size :- 112 X 112
                nn.Conv2d(in_channels =32 , out_channels=64,kernel_size=3,padding = 1),
                nn.ReLU(),
                nn.MaxPool2d(kernel_size=2 , stride=2),                           #Total size :- 56 X 56
                nn.Conv2d(in_channels=64,out_channels=128,kernel_size=3,padding=1),
                nn.ReLU(),
                #nn.MaxPool2d(kernel_size=2,padding=1),                             #Total size :- 56 X 56
            
            )
        
        self.ANN = nn.Sequential(
                nn.Flatten(),
                nn.Linear(128*56*56, 512),            ########## 
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(512, num_classes),
                
        ) 
    def forward(self,x):
        x=self.feature(x)
        x=self.ANN(x)
        return x


In [8]:
#Defining Model
model =CNN_model(num_classes=4)
model.to(device)

CNN_model(
  (feature): Sequential(
    (0): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (7): ReLU()
  )
  (ANN): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=401408, out_features=512, bias=True)
    (2): ReLU()
    (3): Dropout(p=0.5, inplace=False)
    (4): Linear(in_features=512, out_features=4, bias=True)
  )
)

In [9]:
learning_rate = 0.001
epochs = 10

In [10]:
# Defining loss and optimizer functions .
loss_fn = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [11]:
# Training the model
for epoch in range(epochs):
    model.train()
    for batch_images,batch_labels in train_loader:
        images,labels = batch_images.to(device),batch_labels.to(device)

        #forward pass
        output = model(images)
        loss = loss_fn(output,labels)

        #backward pass
        optimizer.zero_grad()          #Clears old gradients from the last step
        loss.backward()                #Calculates the new GRADIENTS
        optimizer.step()               #Updates the weights
    print("Epoch no. : ", epoch," Loss : ", loss.item())

Epoch no. :  0  Loss :  0.2128683477640152
Epoch no. :  1  Loss :  0.46274515986442566
Epoch no. :  2  Loss :  0.10942863672971725
Epoch no. :  3  Loss :  0.3126348555088043
Epoch no. :  4  Loss :  0.13721224665641785
Epoch no. :  5  Loss :  0.1272735446691513
Epoch no. :  6  Loss :  0.004430891014635563
Epoch no. :  7  Loss :  0.001412202138453722
Epoch no. :  8  Loss :  0.3122063875198364
Epoch no. :  9  Loss :  0.004471985623240471


In [12]:
torch.save(model.state_dict(),"banana_model2.pth")

In [14]:
for batch,images in val_loader:
    images,labels = batch_images.to(device),batch_labels.to(device)
    output = model(images)
    loss = loss_fn(output,labels)
print(loss.item())

0.0628163069486618
