In [None]:
!pip install skorch

In [None]:
import os
import glob
import tqdm
import numpy as np
import pandas as pd
import pickle
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
import skorch
from PIL import Image
from torchvision import transforms
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score, accuracy_score, roc_auc_score

## Homework: EuroSAT: A land use and land cover classification dataset

<a href='https://arxiv.org/abs/1709.00029'>Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification.</a> Patrick Helber, Benjamin Bischke, Andreas Dengel, Damian Borth. IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing, 2019.

<img src='https://raw.githubusercontent.com/phelber/EuroSAT/master/eurosat_overview_small.jpg'>




## Question 1: Use a pre-trained model

In class we achieved a classification accuracy of 90.6% on the test dataset of EuroSAT. Can you improve the performance of this model?

Ideas:
- Try to adjust the learning rate or number of epochs
- Try adding additional transforms to the preprocessing pipeline (see <a href='https://pytorch.org/docs/stable/torchvision/transforms.html'>torchvision.transforms</a>)
- Try using a different pre-trained model (see <a href='https://pytorch.org/docs/stable/torchvision/models.html#classification'>torchvision.models</a>)

### Download the dataset

In [None]:
# http://madm.dfki.de/files/sentinel/EuroSAT.zip
os.system("wget http://madm.dfki.de/files/sentinel/EuroSAT.zip")
os.system("unzip EuroSAT.zip")

### Load the dataset

In [None]:
def load_eurosat_dataset():

  data_folders = sorted(glob.glob("2750/*"))
  # preprocessing steps for image
  preprocess = transforms.Compose([transforms.ToTensor(),
                                   transforms.RandomHorizontalFlip(),
                                   transforms.RandomVerticalFlip(),
                                   # normalization used on training resnet-50 data
                                   transforms.Normalize(mean=[0.7137, 0.6628, 0.6519], \
                                                        std=[0.2970, 0.3017, 0.2979]),])
  X = []
  y = []
  for idx, folder in enumerate(data_folders):
    imgs = sorted(glob.glob(folder + "/*.jpg"))
    for i in tqdm.tqdm(imgs):
      img = Image.open(i)
      img = preprocess(img)
      X.append(img)
      y.append(torch.tensor([idx]))
  X = torch.stack(X, dim=0).float()
  y = torch.stack(y, dim=0).flatten().long()
  return X, y

X, y = load_eurosat_dataset()
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

### Initialize pre-trained model

In [None]:
model = torchvision.models.resnet50(pretrained=True)
model

### Re-initialize final layer of pre-trained model

In [None]:
model.fc = nn.Sequential(nn.Linear(2048, 10),
                          nn.Softmax(dim=1))

### Fit the model

In [None]:
from skorch import NeuralNetClassifier
def optim(pgroups, **kwargs):
  return torch.optim.Adam(model.fc.parameters(), **kwargs)
clf = NeuralNetClassifier(model, batch_size=512, max_epochs=10, lr=1e-3, optimizer=optim, device='cuda')
clf.fit(X_train, y_train)

In [None]:
clf = NeuralNetClassifier(model, batch_size=512, max_epochs=10, lr=1e-3, device='cuda')
clf.fit(X_train, y_train)

### Evaluate the model

In [None]:
clf.score(X_test, y_test)