In [29]:
import torch
import torchvision
import torchvision.transforms as transforms
from torchvision import models
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os


In [46]:
class LandUseDataset(Dataset):
    def __init__(self, csv_file=None, image_dir=None, transform=None, dataframe=None):
        if dataframe is not None:
            self.data = dataframe.reset_index(drop=True)
        elif csv_file is not None:
            self.data = pd.read_csv(csv_file)
        else:
            raise ValueError("Entweder 'csv_file' oder 'dataframe' muss übergeben werden.")

        self.image_dir = image_dir
        self.transform = transform

        self.classes = sorted(self.data['class'].unique())
        self.class_to_idx = {label: idx for idx, label in enumerate(self.classes)}
        self.idx_to_class = {idx: label for label, idx in self.class_to_idx.items()}
        self.data['label_idx'] = self.data['class'].map(self.class_to_idx)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]
        img_path = os.path.join(self.image_dir, row['fn'])
        image = Image.open(img_path).convert('RGB')
        label = row['label_idx']
        if self.transform:
            image = self.transform(image)
        return image, label


In [63]:
from google.colab import drive
drive.mount('/content/drive')


In [66]:
import zipfile
import os

zip_path_train = '/content/drive/MyDrive/Hackerthon2/train.zip'
extract_path_train = '/content/imagesFinal/train'

with zipfile.ZipFile(zip_path_train, 'r') as zip_ref:
    zip_ref.extractall(extract_path_train)


In [67]:
zip_path_test = '/content/drive/My Drive/Hackerthon2/test.zip'
extract_path_test = '/content/imagesFinal/test'

with zipfile.ZipFile(zip_path_test, 'r') as zip_ref:
    zip_ref.extractall(extract_path_test)


In [70]:
print("Train-Dateien:", os.listdir(extract_path_train)[:5])
print("Test-Dateien:", os.listdir(extract_path_test)[:5])

train_dir = '/content/imagesFinal/train/train'
test_dir = '/content/imagesFinal/test/test'

train_files = [f for f in os.listdir(train_dir) if os.path.isfile(os.path.join(train_dir, f))]
test_files = [f for f in os.listdir(test_dir) if os.path.isfile(os.path.join(test_dir, f))]

print(f"Train-Bilder: {len(train_files)}")
print(f"Test-Bilder: {len(test_files)}")


In [71]:
import pandas as pd

df = pd.read_csv('/content/train.csv')
print(df.columns)
print(df.head())
print(len(df))

In [90]:
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(30),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.3),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406],[0.229, 0.224, 0.225])
])

csv_path = '/content/train.csv'
image_dir = '/content/imagesFinal/train/train'

train_dataset = LandUseDataset(
    csv_file=csv_path,
    image_dir=image_dir,
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


In [91]:
from torchvision.models import resnet50

model = resnet50(pretrained=True)
model.fc = nn.Linear(model.fc.in_features, 10)

model = model.to('cuda' if torch.cuda.is_available() else 'cpu')


In [92]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)


In [93]:
import pandas as pd
df = pd.read_csv('/content/train.csv')
print(df.columns)

In [94]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to('cuda'), labels.to('cuda')
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    scheduler.step()
    print(f"Epoch {epoch+1}, Loss: {running_loss / len(train_loader)}")


In [82]:
class TestDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_filenames = sorted([
            f for f in os.listdir(image_dir)
            if f.lower().endswith(('.jpg', '.jpeg', '.png'))
        ])
        self.transform = transform

    def __len__(self):
        return len(self.image_filenames)

    def __getitem__(self, idx):
        img_name = self.image_filenames[idx]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        return image, img_name


In [84]:
test_dir = '/content/images/test/test'
test_dataset = TestDataset(image_dir=test_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print("Anzahl Testbilder:", len(test_dataset))
print("Bildnamen:", test_dataset.image_filenames[:5])


In [95]:
from sklearn.model_selection import train_test_split

df = pd.read_csv('/content/train.csv')
train_df, val_df = train_test_split(df, test_size=0.2, stratify=df['class'], random_state=42)


In [96]:
train_dataset = LandUseDataset(
    dataframe=train_df,
    image_dir='/content/images/train/train',
    transform=transform
)

val_dataset = LandUseDataset(
    dataframe=val_df,
    image_dir='/content/images/train',
    transform=transform
)

train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)


In [97]:
print(type(train_df))
print(train_df.head())


In [99]:
def evaluate(model, dataloader, class_names):
    model.eval()
    correct = 0
    total = 0
    predictions = []
    true_labels = []

    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to('cuda'), labels.to('cuda')
            outputs = model(inputs)
            _, preds = torch.max(outputs, 1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            predictions.extend(preds.cpu().numpy())
            true_labels.extend(labels.cpu().numpy())
    accuracy = correct / total
    print(f"Validation Accuracy: {accuracy:.4f}")
    return predictions, true_labels


In [100]:
preds, true = evaluate(model, val_loader, class_names=train_dataset.classes)


In [98]:
model.eval()
predictions = []
filenames = []

with torch.no_grad():
    for inputs, img_names in test_loader:
        inputs = inputs.to('cuda')
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        predictions.extend(preds.cpu().numpy())
        filenames.extend(img_names)

label_names = [train_dataset.idx_to_class[p] for p in predictions]

submission = pd.DataFrame({'fn': filenames, 'class': label_names})
submission = submission.sort_values('fn').reset_index(drop=True)
submission.to_csv('submission.csv', index=False)

from google.colab import files
files.download('submission.csv')


In [81]:
import os

test_dir = '/content/images/test/test'
files = os.listdir(test_dir)

print("Anzahl Einträge im Testordner:", len(files))
print("Beispiel-Dateien:", files[:5])
