In [21]:
!pip install peft
!pip install --upgrade peft

  pid, fd = os.forkpty()




In [22]:
import os
import torch
from PIL import Image
import pandas as pd
from torchvision import transforms, models
from torch.utils.data import DataLoader, Dataset
from torch import nn, optim
from sklearn.model_selection import train_test_split
from tqdm import tqdm
from transformers import (
    AutoImageProcessor, 
    AutoModelForImageClassification, 
    ResNetModel, 
    ResNetConfig
)
import torch.nn.functional as F
import peft
from peft import get_peft_model, LoraConfig


In [23]:
!unzip /kaggle/input/galaxy-zoo-the-galaxy-challenge/training_solutions_rev1.zip

Archive:  /kaggle/input/galaxy-zoo-the-galaxy-challenge/training_solutions_rev1.zip
replace training_solutions_rev1.csv? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [24]:
!unzip /kaggle/input/galaxy-zoo-the-galaxy-challenge/images_training_rev1.zip

Archive:  /kaggle/input/galaxy-zoo-the-galaxy-challenge/images_training_rev1.zip
replace images_training_rev1/100008.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [25]:
!unzip /kaggle/input/galaxy-zoo-the-galaxy-challenge/images_test_rev1.zip

Archive:  /kaggle/input/galaxy-zoo-the-galaxy-challenge/images_test_rev1.zip
replace images_test_rev1/100018.jpg? [y]es, [n]o, [A]ll, [N]one, [r]ename: ^C


In [26]:
df = pd.read_csv("/kaggle/working/training_solutions_rev1.csv")

In [27]:
df.head()

Unnamed: 0,GalaxyID,Class1.1,Class1.2,Class1.3,Class2.1,Class2.2,Class3.1,Class3.2,Class4.1,Class4.2,...,Class9.3,Class10.1,Class10.2,Class10.3,Class11.1,Class11.2,Class11.3,Class11.4,Class11.5,Class11.6
0,100008,0.383147,0.616853,0.0,0.0,0.616853,0.038452,0.578401,0.418398,0.198455,...,0.0,0.279952,0.138445,0.0,0.0,0.092886,0.0,0.0,0.0,0.325512
1,100023,0.327001,0.663777,0.009222,0.031178,0.632599,0.46737,0.165229,0.591328,0.041271,...,0.018764,0.0,0.131378,0.45995,0.0,0.591328,0.0,0.0,0.0,0.0
2,100053,0.765717,0.177352,0.056931,0.0,0.177352,0.0,0.177352,0.0,0.177352,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,100078,0.693377,0.238564,0.068059,0.0,0.238564,0.109493,0.129071,0.189098,0.049466,...,0.0,0.094549,0.0,0.094549,0.189098,0.0,0.0,0.0,0.0,0.0
4,100090,0.933839,0.0,0.066161,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
folder_path = "/kaggle/working/images_training_rev1" 

image_files = [os.path.join(folder_path, file) for file in os.listdir(folder_path) if file.endswith(('.png', '.jpg', '.jpeg'))]

In [29]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [30]:
device

device(type='cuda')

In [31]:
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
import os
import pandas as pd
from PIL import Image
import torch
from sklearn.model_selection import train_test_split

class CustomGalaxyDataset(Dataset):
    def __init__(self, dataframe, image_dir, transform=None):
        self.dataframe = dataframe
        self.image_dir = image_dir
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, index):
        entry = self.dataframe.iloc[index]
        galaxy_id = str(int(entry['GalaxyID']))
        img_path = os.path.join(self.image_dir, f"{galaxy_id}.jpg")
        
        if not os.path.exists(img_path):
            print(f"Warning: Missing image at {img_path}.")
            img = Image.new('RGB', (224, 224), color=(255, 255, 255))  # Create a blank image.
        else:
            img = Image.open(img_path).convert("RGB")
        
        if self.transform:
            img = self.transform(img)
        
        target = torch.tensor(entry.iloc[1:].values, dtype=torch.float)
        return img, target

image_transforms = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])

dataframe = pd.read_csv("/kaggle/working/training_solutions_rev1.csv")

train_data, test_data = train_test_split(dataframe, test_size=0.2, random_state=42)

train_dataset = CustomGalaxyDataset(dataframe=train_data, image_dir="/kaggle/working/images_training_rev1", transform=image_transforms)
test_dataset = CustomGalaxyDataset(dataframe=test_data, image_dir="/kaggle/working/images_training_rev1", transform=image_transforms)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)


In [42]:
def assess_model_performance(model, dataloader):
    model.eval()
    total_samples = 0
    correct_predictions = 0
    
    with torch.no_grad():
        for batch in tqdm(dataloader):
            images, labels = batch
            images, labels = images.to(device), labels.to(device)

            predictions = model.to(device)(images)

            probabilities = F.softmax(predictions, dim=1)

            _, predicted_classes = torch.max(probabilities, dim=1)

            true_classes = labels.argmax(dim=1)

            correct_predictions += (predicted_classes == true_classes).sum().item()
            total_samples += true_classes.size(0)
    
    accuracy = (correct_predictions / total_samples) * 100
    return accuracy


In [43]:
model_before = models.resnet50(pretrained=True).to(device)
model_before.fc = nn.Linear(model_before.fc.in_features, 37)

In [44]:
print("Accuracy without fine-tuning:", assess_model_performance(model_before, test_loader))

100%|██████████| 385/385 [01:19<00:00,  4.86it/s]

Accuracy without fine-tuning: 0.008119519324455992





In [33]:
model = models.resnet50(pretrained=True).to(device)
model.fc = nn.Linear(model.fc.in_features, 37)



In [35]:
loss_function = nn.BCEWithLogitsLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4)

def train_and_optimize_model(model, dataloader, optimizer, epochs=2):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0.0
        print(f"Starting Epoch {epoch + 1} of {epochs}")
        
        for images, labels in tqdm(dataloader, desc=f"Epoch {epoch + 1} Progress", leave=False):

            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            predictions = model.to(device)(images)
            
            loss = loss_function(predictions, labels)
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        print(f"Average Loss: {epoch_loss / len(dataloader):.4f}")
 
train_and_optimize_model(model, train_loader, optimizer)
accuracy_after_finetuning = assess_model_performance(model, test_loader)
print(f"Accuracy after Fine-Tuning: {accuracy_after_finetuning:.4f}")

Starting Epoch 1 of 2


                                                                     

Average Loss: 0.2456
Starting Epoch 2 of 2


                                                                     

Average Loss: 0.2340


100%|██████████| 385/385 [01:24<00:00,  4.56it/s]

Accuracy after Fine-Tuning: 79.6281





In [36]:
model.eval()

print("Accuracy after regular fine-tuning:", assess_model_performance(model, test_loader))

100%|██████████| 385/385 [01:22<00:00,  4.66it/s]

Accuracy after regular fine-tuning: 79.62812601493991





In [37]:
base_model = models.resnet50(pretrained=True)

base_model.fc = nn.Linear(base_model.fc.in_features, 37)

lora_parameters = LoraConfig(
    r=8,
    lora_alpha=16,
    target_modules=['fc'],
    lora_dropout=0.01,
)

enhanced_model = get_peft_model(base_model, lora_parameters)
enhanced_model = enhanced_model.to(device)

optimizer = torch.optim.Adam(enhanced_model.parameters(), lr=2e-4)
loss_function = nn.CrossEntropyLoss()

def fine_tune_with_lora(model, dataloader, optimizer, epochs=2):
    model.train()
    for epoch in range(epochs):
        epoch_loss = 0.0
        print(f"Starting Epoch {epoch + 1} of {epochs}")
        
        for images, labels in tqdm(dataloader, desc=f"Fine-tuning with LoRA: Epoch {epoch + 1}", leave=False):
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            
            predictions = model(images)
            
            loss = loss_function(predictions, labels)
            
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item()
        
        print(f"Average Loss: {epoch_loss / len(dataloader):.4f}")

fine_tune_with_lora(enhanced_model, train_loader, optimizer)

accuracy_after_lora_fine_tuning = assess_model_performance(enhanced_model, test_loader)
print(f"Model Accuracy after LoRA Fine-tuning: {accuracy_after_lora_fine_tuning:.4f}")

Starting Epoch 1 of 2


                                                                                   

Average Loss: 15.1707
Starting Epoch 2 of 2


                                                                                   

Average Loss: 14.7205


100%|██████████| 385/385 [01:21<00:00,  4.71it/s]

Model Accuracy after LoRA Fine-tuning: 50.9825



