One-Hot Encoding

In [57]:
import pandas as pd
df=pd.read_csv('../data/image_labels.csv')
one_hot_df = pd.get_dummies(df.drop('id',axis=1), columns=['label1', 'label2', 'label3', 'label4'])
one_hot_df.columns.shape
one_hot_df.insert(loc=0,column='id',value=df['id'])
one_hot_df
one_hot_df.to_csv('../data/One_Hot_labels.csv')

In [58]:
import pandas as pd
from sklearn.model_selection import train_test_split

# Load the CSV file
data = pd.read_csv('../data/One_Hot_labels.csv')

# Split the data into train and validation sets
train_data, val_data = train_test_split(data, test_size=0.2, random_state=42)

# Check the sizes of the train and validation sets
print("Train dataset size:", len(train_data))
print("Validation dataset size:", len(val_data))
train_data.to_csv('../data/train.csv')
val_data.to_csv('../data/val.csv')


Train dataset size: 420
Validation dataset size: 105


In [59]:
import os
import shutil
dataset_path = '../data/land_use/'
train_path='../data/land_use/train'
val_path='../data/land_use/val'
for _, row_train in train_data.iterrows():
    image_id = row_train['id']
    image_path = os.path.join(dataset_path, image_id + '.jpg')
    shutil.copy(image_path, train_path)
for _, row_val in val_data.iterrows():
    image_id = row_val['id']
    image_path = os.path.join(dataset_path, image_id + '.jpg')
    shutil.copy(image_path, val_path)


In [60]:
import torch
import PIL
class LandUseDataset(torch.utils.data.Dataset):
    def __init__(self, image_labels, images_path, transform=None):
        self.image_labels = image_labels
        self.images_path = images_path
        self.transform = transform
    def __len__(self):
        return len(self.image_labels)
    def __getitem__(self, idx):
        image_id, labels = self.image_labels[idx]
        # Load the JPG image containing 4 sub-pictures
        image_filename = f'{image_id}.jpg'
        image_filepath = f'{self.images_path}/{image_filename}'
        image = PIL.Image.open(image_filepath)
        # Preprocess the image if a transform is provided
        if self.transform is not None:
            image = self.transform(image)
        # Convert labels to tensors
        labels = torch.nn.functional.one_hot(torch.tensor(self.image_labels),21)
        return image,labels

In [61]:
import torch.nn as nn

class ImageClassifier(nn.Module):
    def __init__(self,num_classes):
        super(ImageClassifier, self).__init__()
        
        # Define your model architecture layers here
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=16, kernel_size=3, stride=1)
        self.relu1 = nn.ReLU()
        self.maxpool1 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=32, kernel_size=3, stride=1)
        self.relu2 = nn.ReLU()
        self.maxpool2 = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc = nn.Linear(32 * 8 * 8, 21)
        self.num_classes=num_classes
    
    def forward(self, x):
        x = self.conv1(x)
        x = self.relu1(x)
        x = self.maxpool1(x)
        x = self.conv2(x)
        x = self.relu2(x)
        x = self.maxpool2(x)
        x = x.view(x.size(0), -1)  # Flatten the input
        x = self.fc(x)
        return x
    
    def backpropagation(self, optimizer, criterion, inputs, labels):
        optimizer.zero_grad()  # Zero the gradients
        outputs = self.forward(inputs)  # Forward pass
        loss = criterion(outputs, labels)  # Compute the loss
        loss.backward()  # Backward pass
        optimizer.step()  # Update the model parameters
    
    def optimize(self, train_loader, num_epochs, learning_rate):
        criterion = nn.CrossEntropyLoss()  # Define the loss criterion
        optimizer = torch.optim.SGD(self.parameters(), lr=learning_rate)  # Define the optimizer
        
        for epoch in range(num_epochs):
            running_loss = 0.0
            for inputs, labels in train_loader:
                self.backpropagation(optimizer, criterion, inputs, labels)
                running_loss += criterion(self.forward(inputs), labels).item()
            
            print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss / len(train_loader)}")


In [62]:
import torch
import torch.nn as nn
from tqdm import tqdm

class TrainEval:
    def __init__(self, epochs, model, train_dataloader, val_dataloader, optimizer, criterion, device, model_name):
        self.model = model
        self.train_dataloader = train_dataloader
        self.val_dataloader = val_dataloader
        self.optimizer = optimizer
        self.criterion = criterion
        self.epochs = epochs
        self.device = device
        self.model_name = model_name
        

    def train_fn(self, current_epoch):
        self.model.train()
        total_loss = 0.0
        tk = tqdm(self.train_dataloader, desc=f"EPOCH [{current_epoch+1}/{self.epochs}] [TRAIN]")

        for t, data in enumerate(tk):
            images, labels = data

            images, labels = images.to(self.device), labels.to(self.device)

            self.optimizer.zero_grad()
            logits = self.model(images)
            loss = self.criterion(logits, labels)
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()
            tk.set_postfix({"Loss": "{:.6f}".format(total_loss / (t + 1))})

        return total_loss / len(self.train_dataloader)

    def eval_fn(self, current_epoch):
        self.model.eval()
        total_loss = 0.0
        tk = tqdm(self.val_dataloader, desc=f"EPOCH [{current_epoch+1}/{self.epochs}] [VALID]")

        with torch.no_grad():
            for t, data in enumerate(tk):
                images, labels = data
                images, labels = images.to(self.device), labels.to(self.device)

                logits = self.model(images)
                loss = self.criterion(logits, labels)

                total_loss += loss.item()
                tk.set_postfix({"Loss": "{:.6f}".format(total_loss / (t + 1))})

        return total_loss / len(self.val_dataloader)

    def train(self):
        best_valid_loss = float('inf')
        best_train_loss = float('inf')

        train_losses = []
        val_losses = []

        for epoch in range(self.epochs):
            train_loss = self.train_fn(epoch)
            val_loss = self.eval_fn(epoch)

            train_losses.append(train_loss)
            val_losses.append(val_loss)

            if val_loss < best_valid_loss:
                torch.save(self.model.state_dict(), f"{self.model_name}_best_weights.pt")
                print("Saved Best Weights")
                best_valid_loss = val_loss
                best_train_loss = train_loss

        print(f"Training Loss: {best_train_loss}")
        print(f"Validation Loss: {best_valid_loss}")

        self.train_losses = train_losses
        self.val_losses = val_losses


In [63]:
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch import optim
num_classes = 21
batch_size = 16
num_epochs = 10
learning_rate = 0.001
image_dir = '../data/land_use'
csv_file = '../data/train.csv'

# Define transformations to be applied to the images
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
dataset = LandUseDataset(csv_file, image_dir, transform=transform)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)

# Create an instance of the model
model = ImageClassifier(num_classes)

# Define the loss criterion and optimizer
criterion = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [67]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision.transforms import Compose, ToTensor, Resize, Normalize
from torchvision.datasets import ImageFolder
from torchvision.models import resnet50

# Set the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Path to the dataset
dataset_path = './data/land_use/train/'
# Path to the CSV file
train_csv_path = '../data/train.csv'
val_csv_path="../data/val.csv"

# Define the transforms for preprocessing the images
transform = Compose([
    Resize((224, 224)),
    ToTensor(),
    Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize with ImageNet mean and std
])

# Load the dataset and split into train and validation sets
dataset = LandUseDataset(dataset_path, transform=transform)
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
# Load the CSV file
train_df=pd.read_csv(train_csv_path)
val_df=pd.read_csv(val_csv_path)
# Split the dataset into train and validation sets
# Get the IDs for the train and validation images
train_ids = train_df['id'].tolist()
val_ids = val_df['id'].tolist()

# Create the train and validation datasets
train_dataset = torch.utils.data.Subset(dataset, [dataset.samples.index(sample) for sample in dataset.samples if sample[0].split('/')[-1].split('.')[0] in train_ids])
val_dataset = torch.utils.data.Subset(dataset, [dataset.samples.index(sample) for sample in dataset.samples if sample[0].split('/')[-1].split('.')[0] in val_ids])

# Define the dataloaders
batch_size = 16
train_dataloader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
val_dataloader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)

# Define the model
model = resnet50(pretrained=True)
num_classes = 21
num_features = model.fc.in_features
model.fc = nn.Linear(num_features, num_classes)
model.to(device)

# Define the optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Define the training and evaluation loop
def train_eval(epochs, model, train_dataloader, val_dataloader, optimizer, criterion, device):
    best_val_loss = float('inf')

    for epoch in range(epochs):
        # Training
        model.train()
        train_loss = 0.0
        for images, labels in train_dataloader:
            images = images.to(device)
            labels = labels.to(device)
            optimizer.zero_grad()
            logits = model(images)
            loss = criterion(logits, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()

        # Validation
        model.eval()
        val_loss = 0.0
        correct = 0
        total = 0
        with torch.no_grad():
            for images, labels in val_dataloader:
                images = images.to(device)
                labels = labels.to(device)
                logits = model(images)
                loss = criterion(logits, labels)
                val_loss += loss.item()
                _, predicted = torch.max(logits.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        # Print training and validation


FileNotFoundError: Couldn't find any class folder in ./data/land_use/train/.