In [1]:
!pip install geopandas
!pip install rasterio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.10.2-py2.py3-none-any.whl (1.0 MB)
[K     |████████████████████████████████| 1.0 MB 34.4 MB/s 
Collecting fiona>=1.8
  Downloading Fiona-1.8.21-cp37-cp37m-manylinux2014_x86_64.whl (16.7 MB)
[K     |████████████████████████████████| 16.7 MB 65.1 MB/s 
Collecting pyproj>=2.2.0
  Downloading pyproj-3.2.1-cp37-cp37m-manylinux2010_x86_64.whl (6.3 MB)
[K     |████████████████████████████████| 6.3 MB 55.5 MB/s 
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1.1-py2.py3-none-any.whl (7.5 kB)
Collecting munch
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Installing collected packages: munch, cligj, click-plugins, pyproj, fiona, geopandas
Successfully installed click-plugins-1.1.1 cligj-0.7.2 fiona-1.8.21 geopandas-0.10.2 munch-2.5.0 pyproj-3.2.1
Looking 

In [2]:
import torch
from torch.utils.data import DataLoader, Dataset
import cv2
from rasterio.features import rasterize
import numpy as np
import geopandas as gpd
from glob import glob
import rasterio
import matplotlib.pyplot as plt
from torch import nn
from tqdm import tqdm
from sklearn.model_selection import train_test_split

In [4]:
img_profile = rasterio.open('/content/drive/MyDrive/VarunaHackathon2022/sentinel-2-image/2021/20210101/IMG_DATA/47PQS_20210101_B04.jp2').profile
label = gpd.read_file('/content/drive/MyDrive/VarunaHackathon2022/training_area/traindata.shp').to_crs(img_profile['crs'])
target_image = rasterize(
    shapes=[(row.geometry, int(row.crop_type)) for i, row in label.iterrows()], # Construct polygon and value tuples
    out_shape=(img_profile['width'], img_profile['height']),
    transform=img_profile['transform']                                          # This will make every pixel in the target image align correctly with satellite image
)

main_path = glob('/content/drive/MyDrive/VarunaHackathon2022/sentinel-2-image/2021/*')
ds = []
for i in tqdm(main_path):
  band_list = []
  for k in glob(i+'/IMG_DATA/*.jp2'):
    b = rasterio.open(k)
    b = b.read()[0]
    b = cv2.resize(b, dsize=(2051, 2051), interpolation=cv2.INTER_CUBIC)
    band_list.append(b[target_image!=0])
  ds.append(np.stack(band_list, axis=-1))
input = np.stack(ds,axis=-1)
label = target_image[target_image!=0]

100%|██████████| 71/71 [31:04<00:00, 26.27s/it]


In [5]:
np.save('input.npy',input)
np.save('labels.npy',label)

In [6]:
input.shape,label.shape

((236882, 15, 71), (236882,))

In [11]:
class MyDataset(Dataset):
  def __init__(self, input, label):
    self.inputs = input / 65535 #(int16)
    self.labels = np.stack([label==1,label==2,label==3,label==4], axis=-1)
  def __len__(self):
    return self.labels.shape[0]
  def __getitem__(self,idx):
    input = self.inputs[idx]
    label = self.labels[idx]
    return torch.tensor(input, dtype=torch.float32), torch.tensor(label, dtype=torch.float32)


In [15]:
np.bincount(label)

array([     0, 100162,  29069,  84342,  23309])

In [18]:
X_re = np.concatenate((input[label == 1][0:20000],
 input[label == 2][0:20000],
 input[label == 3][0:20000],
 input[label == 4][0:20000]))

Y_re = np.concatenate((label[label == 1][0:20000],
 label[label == 2][0:20000],
 label[label == 3][0:20000],
 label[label == 4][0:20000]))


In [20]:
X_train, X_test, y_train, y_test = train_test_split(X_re, Y_re, test_size=0.2, random_state=42)
train_ds = MyDataset(X_train,y_train)
val_ds = MyDataset(X_test,y_test)
train_loader = DataLoader(train_ds, batch_size = 256, shuffle=False)
val_loader = DataLoader(val_ds, batch_size=256, shuffle=False)

In [21]:
class Block1D(nn.Module):
  def __init__(self):
    super().__init__()
    self.conv1 = nn.Conv1d(15, 8, kernel_size=12, stride = 6)
    self.relu = nn.ReLU()

    self.conv2 = nn.Conv1d(8, 4, kernel_size=6, stride = 3)
    self.conv3 = nn.Conv1d(4, 4, kernel_size=2, stride = 1)
    self.softmax = nn.Softmax(dim=1)
  def forward(self, x):
    x = self.conv1(x)
    x = self.relu(x)
    x = self.conv2(x)
    x = self.relu(x)
    x = self.conv3(x)
    x = self.softmax(x)
    return x

class MyModel(nn.Module):
  def __init__(self):
    super().__init__()
    self.block1d = Block1D()
  def forward(self, x):
    B, Chan, T = x.shape
    x = self.block1d(x)
    return x.squeeze()

In [22]:
model = MyModel()
model(torch.rand(4,15,71))

tensor([[0.2060, 0.3276, 0.2675, 0.1989],
        [0.2049, 0.3286, 0.2629, 0.2036],
        [0.2075, 0.3250, 0.2645, 0.2030],
        [0.1929, 0.3283, 0.2764, 0.2024]], grad_fn=<SqueezeBackward0>)

In [23]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

MyModel(
  (block1d): Block1D(
    (conv1): Conv1d(15, 8, kernel_size=(12,), stride=(6,))
    (relu): ReLU()
    (conv2): Conv1d(8, 4, kernel_size=(6,), stride=(3,))
    (conv3): Conv1d(4, 4, kernel_size=(2,), stride=(1,))
    (softmax): Softmax(dim=1)
  )
)

In [26]:
from torch.utils.tensorboard import SummaryWriter
lossFunc = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
epoch = 10000
PATH = '/content/drive/MyDrive/TestSet'
writer = SummaryWriter(PATH)

In [30]:
from sklearn.metrics import f1_score

for i in range(epoch):
  model.train()
  running_loss = 0.0
  for img, labels in train_loader:
    img = img.to(device)
    labels = labels.to(device)
    optimizer.zero_grad()
    out = model(img)
    loss = lossFunc(out,labels)
    loss.backward()
    optimizer.step()

    running_loss += loss
    y_pred = torch.argmax(out, dim=1)
    y_gt = torch.argmax(labels,axis=1)
    f1_train = f1_score(y_pred.cpu(), y_gt.cpu(), average='macro')
  train_loss = running_loss / len(train_loader)
  print(f'train_loss:{train_loss} F1_score:{f1_train}')

  model.eval()
  running_loss = 0.0
  for img, labels in val_loader:
    img = img.to(device)
    labels = labels.to(device)
    out = model(img)
    loss = lossFunc(out,labels)

    running_loss += loss
    y_pred = torch.argmax(out, dim=1)
    y_gt = torch.argmax(labels,axis=1)
    f1_valid = f1_score(y_pred.cpu(), y_gt.cpu(), average='macro')
  valid_loss = running_loss / len(val_loader)
  print(f'valid_loss:{valid_loss} F1_score:{f1_valid}')
  writer.add_scalar('Loss_Epoch/train', train_loss, i)
  writer.add_scalar('Loss_Epoch/valid', valid_loss, i)
  writer.add_scalar('F1_macro/train', f1_train, i)
  writer.add_scalar('F1_macro/valid', f1_valid, i)

train_loss:1.3851642608642578 F1_score:0.09365079365079365
valid_loss:1.3832894563674927 F1_score:0.08169934640522876
train_loss:1.3808788061141968 F1_score:0.25626566416040103
valid_loss:1.377498984336853 F1_score:0.2299709724238026
train_loss:1.3725881576538086 F1_score:0.2678314467582592
valid_loss:1.3675308227539062 F1_score:0.3039093230924107
train_loss:1.359550952911377 F1_score:0.33137155420260445
valid_loss:1.3526406288146973 F1_score:0.29152096060669896
train_loss:1.3418328762054443 F1_score:0.32443635179345975
valid_loss:1.3344063758850098 F1_score:0.2911616161616162
train_loss:1.323171615600586 F1_score:0.3894831900680668
valid_loss:1.3173654079437256 F1_score:0.3646198830409357
train_loss:1.3070348501205444 F1_score:0.4225868477586061
valid_loss:1.303249478340149 F1_score:0.3931442850934377
train_loss:1.293852686882019 F1_score:0.43085633823924274
valid_loss:1.2915669679641724 F1_score:0.4069623678545227
train_loss:1.282753825187683 F1_score:0.4375669324131366
valid_loss:1.

KeyboardInterrupt: ignored

In [32]:
def save_checkpoint(checkpoint_name):
  torch.save({'model_state_dict':model.state_dict(),
              'optimizer_state_dict':optimizer.state_dict()
              }
             ,checkpoint_name)

In [34]:
save_checkpoint('/content/drive/MyDrive/TestSet/model_dict')

### Predict

In [74]:
img_profile = rasterio.open('/content/drive/MyDrive/VarunaHackathon2022/sentinel-2-image/2020/20200102/IMG_DATA/47PQS_20200102_B04.jp2').profile
label = gpd.read_file('/content/drive/MyDrive/VarunaHackathon2022/testing_area/testdata.shp').to_crs(img_profile['crs'])
target_image = rasterize(
    shapes=[(row.geometry, 1) for i, row in label.iterrows()], # Construct polygon and value tuples
    out_shape=(img_profile['width'], img_profile['height']),
    transform=img_profile['transform']                                          # This will make every pixel in the target image align correctly with satellite image
)

main_path = glob('/content/drive/MyDrive/VarunaHackathon2022/sentinel-2-image/2021/*')
ds = []
for i in tqdm(main_path):
  band_list = []
  for k in glob(i+'/IMG_DATA/*.jp2'):
    b = rasterio.open(k)
    b = b.read()[0]
    b = cv2.resize(b, dsize=(2051, 2051), interpolation=cv2.INTER_CUBIC)
    band_list.append(b[target_image!=0])
  ds.append(np.stack(band_list, axis=-1))
input = np.stack(ds,axis=-1)
label = target_image[target_image!=0]

100%|██████████| 71/71 [03:45<00:00,  3.18s/it]


In [77]:
np.save('/content/drive/MyDrive/TestSet/input2.npy',input)
np.save('/content/drive/MyDrive/TestSet/labels2.npy',label)

In [78]:
y_pred_list = []
for i in tqdm(input):
  y_pred = model(torch.tensor(i / 65535,dtype=torch.float32).unsqueeze(0).to(device))
  y_pred = torch.argmax(y_pred, dim=0)
  y_pred_list.append(y_pred.cpu() + 1)
np.save('/content/drive/MyDrive/TestSet/test2020pred.npy', y_pred_list)

100%|██████████| 100013/100013 [00:51<00:00, 1947.67it/s]


In [80]:
target_image[target_image!=0] = np.load('/content/drive/MyDrive/TestSet/test2020pred.npy')
y_pred_img = target_image

In [81]:
np.bincount(y_pred_img[y_pred_img!=0])

array([    0, 14906, 13746, 19924, 51437])

In [82]:
img_profile = rasterio.open('/content/drive/MyDrive/VarunaHackathon2022/sentinel-2-image/2020/20200102/IMG_DATA/47PQS_20200102_B04.jp2').profile
label = gpd.read_file('/content/drive/MyDrive/VarunaHackathon2022/testing_area/testdata.shp').to_crs(img_profile['crs'])
crop_type = []
for i,row in tqdm(label.iterrows()):
  target_image = rasterize(
      shapes=[(row.geometry, 1)], # Construct polygon and value tuples
      out_shape=(img_profile['width'], img_profile['height']),
      transform=img_profile['transform']                                          # This will make every pixel in the target image align correctly with satellite image
  )
  pixel_predict = y_pred_img[target_image!=0]
  majority_pred = np.argmax(np.bincount(pixel_predict))
  crop_type.append(majority_pred)

565it [00:02, 194.81it/s]


In [83]:
import pandas as pd
df = pd.DataFrame(crop_type, columns=['crop_type'])
df.to_csv('/content/drive/MyDrive/TestSet/final_submission.csv')
df.value_counts()

crop_type
4            281
3            108
2             89
1             87
dtype: int64