In [2]:
# !unzip /content/image-processing-house-recognition.zip

In [3]:
!pip -q install torch torchvision transformers timm

In [4]:
import os
import pandas as pd
import torch
import torch.nn as nn
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from PIL import Image
from timm import create_model

In [77]:
class HouseDataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None):
        self.data = pd.read_csv(csv_file)
        self.img_dir = img_dir
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        img_path = os.path.join(self.img_dir, self.data.iloc[idx, 0])
        image = Image.open(img_path).convert('RGB')
        label = torch.tensor(self.data.iloc[idx, 1], dtype=torch.float32)

        if self.transform:
            image = self.transform(image)

        return image, label

In [96]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),  # Flip images randomly
    transforms.RandomRotation(10),  # Rotate images slightly
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),  # Adjust colors
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [97]:
train_dataset = HouseDataset("/kaggle/input/image-processing-house-recognition/train.csv", "/kaggle/input/image-processing-house-recognition/train/train", transform=transform)
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)

In [80]:
import timm

# timm.list_models("")

In [106]:
# model = create_model("swin_base_patch4_window7_224", pretrained=True, num_classes=1) #0.97
model = create_model("convnextv2_tiny", pretrained=True, num_classes=1) # 0.9888
# model = create_model("convnextv2_base", pretrained=True, num_classes=1) # 0.9777)
# model = create_model("eva02_tiny_patch14_224", pretrained=True, num_classes=1) # 0.94
# model = create_model("hf_hub:timm/vit_pwee_patch16_reg1_gap_256.sbb_in1k", pretrained=True, num_classes=1) # 0.977
# model = create_model("hf_hub:timm/vit_little_patch16_reg1_gap_256.sbb_in12k_ft_in1k", pretrained=True, num_classes=1)

model = model.to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))

In [107]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)

In [108]:
def train_model(model, dataloader, criterion, optimizer, epochs=5):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.train()

    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device).unsqueeze(1)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

        print(f"Epoch {epoch+1}, Loss: {running_loss/len(dataloader):.4f}")

In [None]:
train_model(model, train_loader, criterion, optimizer, epochs=10)

Epoch 1, Loss: 0.8753


In [85]:
torch.save(model.state_dict(), "convnext_house_model.pth")

Test folder image preprocessing

In [87]:
test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5])
])

In [88]:
import torch
import os
import pandas as pd
from PIL import Image
from torchvision import transforms

def predict(model, test_folder, output_csv):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model.to(device)
    model.eval()
    
    test_images = [f for f in os.listdir(test_folder) if f.lower().endswith(('png', 'jpg', 'jpeg'))]  # Ensure image files only
    results = []

    with torch.no_grad():
        for img_name in test_images:
            img_path = os.path.join(test_folder, img_name)
            image = Image.open(img_path).convert('RGB')
            image = test_transform(image).unsqueeze(0).to(device)  # Apply transformation
            
            output = model(image)
            if isinstance(output, torch.Tensor):  
                output = output.item()  # Ensure conversion if tensor

            pred = 1 if output > 0 else 0  # Convert to binary prediction
            results.append([os.path.splitext(img_name)[0], pred])  # Remove extension safely

    df = pd.DataFrame(results, columns=["id", "answer"])
    df.to_csv(output_csv, index=False)
    print(f"Predictions saved to {output_csv}")

In [89]:
predict(model, "/kaggle/input/image-processing-house-recognition/test/test", "convnextv2_submission.csv")

Predictions saved to vit_pwee_base_submission.csv
