<a href="https://colab.research.google.com/github/Mohamed-El/admin/blob/master/Lacuna_Solar_Survey_Challenge.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [130]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import cv2
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms
from sklearn.model_selection import GroupShuffleSplit
from PIL import Image

# Set random seed for reproducibility
torch.manual_seed(42)
np.random.seed(42)


if torch.cuda.is_available():
    print("✅ GPU is available!")
    print(f"GPU Name: {torch.cuda.get_device_name(0)}")
else:
    print("❌ No GPU found.")

train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")



❌ No GPU found.


In [149]:
def load_image(image_path):
    """Load an image from the given path and resize it to a fixed size."""
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)  # Convert to RGB
    image = cv2.resize(image, (224, 224))  # Resize to 224x224 for model input
    return image

# Define transformations for the dataset
transform = transforms.Compose([
    transforms.ToTensor(),  # Convert image to PyTorch tensor
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])  # Normalize for pretrained models
])

def polygon_to_bbox(polygon):
    """
    Convert a polygon [(x1, y1), (x2, y2), ...] to a rectangular bounding box [x_min, y_min, x_max, y_max].
    """
    x_coords = [point[0] for point in polygon]
    y_coords = [point[1] for point in polygon]
    x_min = min(x_coords)
    y_min = min(y_coords)
    x_max = max(x_coords)
    y_max = max(y_coords)
    return [x_min, y_min, x_max, y_max]

In [160]:
class SolarDataset(Dataset):
    def __init__(self, image_dir, annotations_df, transform=None):
        """
        Args:
            image_dir (str): Path to the directory with images.
            annotations_df (pd.DataFrame): DataFrame containing annotations.
            transform (callable, optional): Optional transform to be applied to images.
        """
        self.image_dir = image_dir
        self.annotations = annotations_df
        self.transform = transform

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, idx):
        # Get image path and load image
        img_name = self.annotations.iloc[idx, 0]  # Assuming the first column is the image ID
        img_path = os.path.join(self.image_dir, img_name + '.jpg')
        image = load_image(img_path)

        # Get polygon annotations
        polygon = eval(self.annotations.iloc[idx, self.annotations.columns.get_loc("polygon")])  # Convert string to list
        bbox = polygon_to_bbox(polygon)  # Convert polygon to rectangular bounding box

        # Convert bounding box to tensor
        bbox = torch.tensor(bbox, dtype=torch.float32)

        # Get target values (number of panels and boilers)
        nbr_pan = self.annotations.iloc[idx, self.annotations.columns.get_loc("pan_nbr")]
        nbr_boil = self.annotations.iloc[idx, self.annotations.columns.get_loc("boil_nbr")]
        target = torch.tensor([nbr_pan, nbr_boil], dtype=torch.float32)

        # Apply transformations if any
        if self.transform:
            image = self.transform(image)

        return image, bbox, target

In [134]:
train_dataset = SolarDataset("images/", train, transform=transform)
group_split = GroupShuffleSplit(test_size=0.2, n_splits=1, random_state=42)
train_idx, val_idx = next(group_split.split(train, groups=train['ID']))
train_df = train.iloc[train_idx]
val_df = train.iloc[val_idx]
train_images = set(train_df['ID'].unique())
val_images = set(val_df['ID'].unique())

overlap = train_images.intersection(val_images)
if overlap:
    print(f"Warning: {len(overlap)} images are present in both training and validation sets.")
else:
    print("No data leakage detected. All images are uniquely split.")

train_dataset = SolarDataset("images/", train_df, transform=transform)
val_dataset = SolarDataset("images/", val_df, transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

No data leakage detected. All images are uniquely split.


In [137]:
# Define a simple CNN model
class SolarModel(nn.Module):
    def __init__(self):
        super(SolarModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 16, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 56 * 56, 128)  # Adjust based on input size
        self.fc2 = nn.Linear(128, 2)  # Output 2 values: nbr_pan and nbr_boil

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = x.view(-1, 32 * 56 * 56)  # Flatten the tensor
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [138]:
# Initialize the model, loss function, and optimizer
model = SolarModel()
criterion = nn.MSELoss()  # Mean Squared Error for regression
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [139]:
# Training loop
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, targets in train_loader:
        # Zero the parameter gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, targets)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Validation loop
model.eval()
val_loss = 0.0
with torch.no_grad():
    for images, targets in val_loader:
        outputs = model(images)
        loss = criterion(outputs, targets)
        val_loss += loss.item()

print(f"Validation Loss: {val_loss/len(val_loader):.4f}")

Epoch [1/10], Loss: 44.9006
Epoch [2/10], Loss: 29.5307
Epoch [3/10], Loss: 21.2834
Epoch [4/10], Loss: 16.0158
Epoch [5/10], Loss: 14.1813
Epoch [6/10], Loss: 10.8565
Epoch [7/10], Loss: 10.1408
Epoch [8/10], Loss: 9.3509
Epoch [9/10], Loss: 9.8348
Epoch [10/10], Loss: 9.0756
Validation Loss: 42.5113


In [140]:
print(model)

SolarModel(
  (conv1): Conv2d(3, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=100352, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=2, bias=True)
)


In [142]:
from torchsummary import summary
summary(model, input_size=(3, 16))

RuntimeError: Given groups=1, weight of size [16, 3, 3, 3], expected input[1, 2, 3, 16] to have 3 channels, but got 2 channels instead

In [145]:
train["polygon"][0]

'[(2087, 2179.0), (2181, 2191.0), (2171, 2223.0), (2257, 2227.0), (2207, 2335.0), (2007, 2329.0)]'

In [148]:
bb

[2007, 2179.0, 2257, 2335.0]

In [150]:
list(map(float, bb.split(',')))

AttributeError: 'list' object has no attribute 'split'