## Dependencies

All the dependencies for this notebook are included in the `requirements.txt` file included in this folder.


In [1]:
from radiant_mlhub import Collection
import tarfile
import os
from pathlib import Path
import json

import geopandas as gpd
import datetime
import rasterio
import numpy as np
import pandas as pd
import elevation
import shapely

import matplotlib.pyplot as plt
from PIL import Image, ImageEnhance
from rasterio.plot import show

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import log_loss
from sklearn.model_selection import StratifiedShuffleSplit

from IPython.display import clear_output

In [2]:
import numpy as np
import os
import PIL
import PIL.Image
import tensorflow as tf
import tensorflow_datasets as tfds

In [42]:
def tile_dataset(tile_id, train):
    list_tile = tf.data.Dataset.list_files(f'/home/jupyter/NF-Capstone-Crop-Classification/stacked_files_CNN/{tile_id}/s2/*', shuffle=False)
    field_ids = rasterio.open(f'/home/jupyter/NF-Capstone-Crop-Classification/stacked_files_CNN/{tile_id}/field_ids.tif').read()
    
    if train == True:
        labels = rasterio.open(f'/home/jupyter/NF-Capstone-Crop-Classification/stacked_files_CNN/{tile_id}/labels.tif').read()
    
    days_tile = []
    
    for f in list_tile.take(-1):
        days_tile.append(int(str(f.numpy()).split("/")[-1].split("_")[-2]))
    print(sorted(days_tile))
    
    
    for field_id in np.unique(field_ids):
        field_mask = field_ids==field_id
        if train == True:
            field_label = round(np.mean(labels[field_mask]))
            
        idxx = np.where(field_mask)
        momentx = np.median(idxx[0]).astype(np.int)
        momenty = np.median(idxx[1]).astype(np.int)
        
        for day in days_tile:
            img_path = f'/home/jupyter/NF-Capstone-Crop-Classification/stacked_files_CNN/{tile_id}/s2/{tile_id}_s2_{day}_stacked.tif'
            
            img = rasterio.open(img_path).read()[1,:,:]
            print(type(img))
            img = tf.convert_to_tensor(img)
    print(img.shape)
    print(type(img))
    print(tf.shape(img))

In [43]:
tile_dataset(11, True)

[91, 111, 124, 144, 186, 194, 204, 216, 229, 244, 261, 279, 294, 309, 316, 334]
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class 'numpy.ndarray'>
<class '

In [92]:
"""
Cheap NN
"""
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.metrics import accuracy_score, classification_report, plot_confusion_matrix, log_loss



from sklearn.preprocessing import StandardScaler
from keras.utils import to_categorical

import tensorflow as tf
from tensorflow.keras.utils import plot_model

import keras 
from keras.models import Sequential # intitialize the ANN
from keras.layers import Dense, Activation, Dropout      # create layers

np.random.seed(42)
tf.random.set_seed(42)

fields_train_mean_var = pd.read_csv('data/mean_var_8days_train.csv')

In [93]:
import random

tile_ids = set(fields_train_mean_var.tile_id.unique())
tile_ids_test = set(random.sample(population=tile_ids, k=int(len(tile_ids)/3)))
tile_ids_train = tile_ids.difference(tile_ids_test)

In [94]:
train = fields_train_mean_var.query('tile_id in @tile_ids_train')
test = fields_train_mean_var.query('tile_id in @tile_ids_test')

X_train = train.drop(['field_id', 'label', 'geometry', 'tile_id', 'elevation'], axis=1)
y_train = train.label


X_test = test.drop(['field_id', 'label', 'geometry', 'tile_id', 'elevation'], axis=1)
y_test = test.label

#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.01, random_state=42, stratify=fields_train_mean_var.label)


scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


y_train_onehot = to_categorical(y_train)
y_test_onehot = to_categorical(y_test)

print(y_train_onehot.shape)
print(y_test_onehot.shape)

(57668, 10)
(29445, 10)


In [85]:
# Initialising the NN
model = Sequential()

# layers
model.add(Dense(units = 256, kernel_initializer = 'HeUniform', activation = 'relu', input_dim = 251))
model.add(Dropout(0.3))
model.add(Dense(units = 128, kernel_initializer = 'HeUniform', activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(units = 64, kernel_initializer = 'HeUniform', activation = 'relu'))
model.add(Dropout(0.3))
model.add(Dense(units = 16, kernel_initializer = 'HeUniform', activation = 'relu'))
model.add(Dense(units = 10, kernel_initializer = 'HeUniform'))

# Compiling the ANN
model.compile(optimizer = 'adam', loss = tf.keras.losses.CategoricalCrossentropy(from_logits=True), metrics = ['accuracy'])
print(model.summary())

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_35 (Dense)             (None, 256)               64512     
_________________________________________________________________
dropout_21 (Dropout)         (None, 256)               0         
_________________________________________________________________
dense_36 (Dense)             (None, 128)               32896     
_________________________________________________________________
dropout_22 (Dropout)         (None, 128)               0         
_________________________________________________________________
dense_37 (Dense)             (None, 64)                8256      
_________________________________________________________________
dropout_23 (Dropout)         (None, 64)                0         
_________________________________________________________________
dense_38 (Dense)             (None, 16)              

In [86]:
training = model.fit(X_train, y_train_onehot, batch_size = 500, validation_split=0.2, epochs = 50, verbose=2)

Epoch 1/50
92/92 - 2s - loss: 1.8194 - accuracy: 0.4103 - val_loss: 1.4037 - val_accuracy: 0.5345
Epoch 2/50
92/92 - 2s - loss: 1.3574 - accuracy: 0.5514 - val_loss: 1.2101 - val_accuracy: 0.5900
Epoch 3/50
92/92 - 2s - loss: 1.2259 - accuracy: 0.5833 - val_loss: 1.1350 - val_accuracy: 0.6143
Epoch 4/50
92/92 - 2s - loss: 1.1543 - accuracy: 0.6070 - val_loss: 1.0963 - val_accuracy: 0.6170
Epoch 5/50
92/92 - 2s - loss: 1.1124 - accuracy: 0.6169 - val_loss: 1.0715 - val_accuracy: 0.6261
Epoch 6/50
92/92 - 2s - loss: 1.0922 - accuracy: 0.6250 - val_loss: 1.0581 - val_accuracy: 0.6308
Epoch 7/50
92/92 - 2s - loss: 1.0575 - accuracy: 0.6342 - val_loss: 1.0397 - val_accuracy: 0.6348
Epoch 8/50
92/92 - 2s - loss: 1.0366 - accuracy: 0.6399 - val_loss: 1.0279 - val_accuracy: 0.6413
Epoch 9/50
92/92 - 2s - loss: 1.0143 - accuracy: 0.6511 - val_loss: 1.0201 - val_accuracy: 0.6475
Epoch 10/50
92/92 - 2s - loss: 1.0043 - accuracy: 0.6529 - val_loss: 1.0149 - val_accuracy: 0.6482
Epoch 11/50
92/92 -

In [87]:
model.evaluate(X_test,  y_test_onehot, verbose=2)

944/944 - 1s - loss: 0.9325 - accuracy: 0.6769


[0.9325171709060669, 0.676921546459198]

In [88]:
probability_model = tf.keras.Sequential([
  model,
  tf.keras.layers.Softmax()
])

probability_model.compile(optimizer = 'adam', loss = tf.keras.losses.CategoricalCrossentropy(), metrics = ['accuracy'])

In [89]:
probability_model.evaluate(X_test,  y_test_onehot, verbose=2)

944/944 - 2s - loss: 0.9325 - accuracy: 0.6769


[0.9325171709060669, 0.676921546459198]

In [36]:
y_test_pred_proba = probability_model.predict(X_test)

In [None]:
##### Build Submission

In [39]:
fields_test_mean_var = pd.read_csv('data/mean_var_8days_test.csv')

In [95]:
X_VAL = fields_test_mean_var.drop(['field_id', 'geometry', 'tile_id', 'elevation'], axis=1)

In [118]:
np.sum(fields_train_mean_var.B06_MEAN_4 == 0)

1140

In [55]:
X_VAL.shape

(35295, 251)

In [56]:
X_VAL = scaler.transform(X_VAL)

In [57]:
X_VAL_predict_proba = probability_model.predict(X_VAL)

In [58]:
X_VAL_predict_proba.shape

(35295, 10)

In [59]:
df_sol_proba = pd.DataFrame(X_VAL_predict_proba.round(2))

In [60]:
df_sol =  pd.concat([fields_test_mean_var['field_id'],df_sol_proba], axis =1)

In [61]:
df_sol

Unnamed: 0,field_id,0,1,2,3,4,5,6,7,8,9
0,62027,0.0,0.06,0.09,0.02,0.01,0.02,0.29,0.49,0.03,0.01
1,62071,0.0,0.24,0.65,0.01,0.00,0.04,0.03,0.01,0.01,0.00
2,85373,0.0,0.06,0.12,0.01,0.01,0.01,0.27,0.48,0.03,0.00
3,102896,0.0,0.02,0.13,0.00,0.00,0.01,0.21,0.57,0.06,0.00
4,3079,0.0,0.09,0.87,0.00,0.00,0.02,0.01,0.00,0.00,0.00
...,...,...,...,...,...,...,...,...,...,...,...
35290,117413,0.0,0.31,0.66,0.00,0.00,0.00,0.02,0.00,0.00,0.00
35291,16515,0.0,0.29,0.65,0.00,0.00,0.01,0.04,0.01,0.01,0.00
35292,59992,0.0,0.14,0.80,0.00,0.01,0.01,0.03,0.00,0.00,0.00
35293,61236,0.0,0.02,0.01,0.92,0.02,0.02,0.01,0.00,0.00,0.00


In [62]:
df_sol = df_sol[["field_id", 8, 3, 1,2,9,6,5,7,4]]
column_names = {"field_id": "Field ID",8:"Crop_Canola",3:"Crop_Fallow",1:"Crop_Lucerne/Medics",2:"Crop_Planted pastures (perennial)",9:"Crop_Rooibos",6:"Crop_Small grain grazing",5:"Crop_Weeds",7:"Crop_Wheat",4:"Crop_Wine grapes"}

df_sol_clean = df_sol.rename(columns = column_names)
df_sol_clean.head()

Unnamed: 0,Field ID,Crop_Canola,Crop_Fallow,Crop_Lucerne/Medics,Crop_Planted pastures (perennial),Crop_Rooibos,Crop_Small grain grazing,Crop_Weeds,Crop_Wheat,Crop_Wine grapes
0,62027,0.03,0.02,0.06,0.09,0.01,0.29,0.02,0.49,0.01
1,62071,0.01,0.01,0.24,0.65,0.0,0.03,0.04,0.01,0.0
2,85373,0.03,0.01,0.06,0.12,0.0,0.27,0.01,0.48,0.01
3,102896,0.06,0.0,0.02,0.13,0.0,0.21,0.01,0.57,0.0
4,3079,0.0,0.0,0.09,0.87,0.0,0.01,0.02,0.0,0.0


In [64]:
df_sol_clean.to_csv("Submission_2.csv", index=False)