# Description
This  script outlines the steps to create a model to identify cashew fields when fed a sample of random fields with various crops. This model methodology is based on labels data obtained from radiant earth foundation for Benin and satellite images obtained from sentinel hub.

In [None]:
import csv
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from os import getcwd
import pandas as pd

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix
from sklearn.metrics import average_precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

In [None]:
#training data
df = pd.read_csv('Cashews_training_data.csv')
print(df.shape)
df.head()

In [None]:
#testing data
df1 = pd.read_csv('Cashews_testing_data.csv')
print(df1.shape)
df1.head()

In [None]:
#validation data
df2 = pd.read_csv('Cashews_validation_data.csv')
print(df2.shape)
df2.head()

In [None]:
#Run this cell only if you are creating a model that is based on field values and not pixels
train_grouped = df.groupby('fid').mean().reset_index()
test_grouped = df1.groupby('fid').mean().reset_index()
val_grouped = df2.groupby('fid').mean().reset_index()

In [None]:
X_train = train_grouped.drop(columns=['label', 'fid', 'row_loc', 'col_loc', 'tile'])
X_test = test_grouped.drop(columns=['label', 'fid', 'row_loc', 'col_loc', 'tile'])
y_train, y_test = train_grouped["label"], train_grouped["label"]
X_val = val_grouped.drop(columns=['label', 'fid', 'row_loc', 'col_loc', 'tile'])
y_val = val_grouped["label"]


print(
    f"Train data shape is {X_train.shape}\n",
    f"testing data shape is {X_test.shape}\n",
    f"validation data shape is {X_val.shape}\n",
)

train_npy = X_train.to_numpy()
test_npy = X_test.to_numpy()
val_npy = X_val.to_numpy()

In [None]:
yTrain = y_train
print("The initial training labels are ",np.unique(yTrain))
yTrain = yTrain - 1
print("The edited training labels are ",np.unique(yTrain))
# yTest = test_npy[:,-1].astype(int)
yTest = y_test
print("The initial testing labels are ",np.unique(yTest))
yTest = yTest - 1
# yTest = yTest - 1
print("The edited testing labels are ",np.unique(yTest))
yVal = y_val
print("The initial testing labels are ",np.unique(yVal))
yVal = yVal - 1
# yTest = yTest - 1
print("The edited testing labels are ",np.unique(yVal))

In [None]:
print("xTrain shape: ",X_train.shape)
print("xTest shape: ",X_test.shape)
print("xVal shape: ",X_val.shape)
print("yTrain shape: ",yTrain.shape)
print("yTest shape: ",yTest.shape)
print("yVal shape: ",yVal.shape)


In [None]:
X_train = train_npy
X_test = test_npy
X_val = val_npy
print("xTrain shape: ",X_train.shape)
print("xTest shape: ",X_test.shape)
print("xVal shape: ",X_val.shape)

In [None]:
max_value = 255
# Normalise the data
xTrain = X_train / max_value
xTest = X_test / max_value
xVal = X_val/max_value
# Reshape the data
xTrain = np.reshape(xTrain,(1235,5,12)) #("number of rows", "number of dates with images", number of bands per image")
xTest = np.reshape(xTest,(309,5,12))
xVal = np.reshape(xVal,(294,5,12))


# Print the shape of reshaped data
print("xTrain:",xTrain.shape)
print("xTest:",xTest.shape)
print("xVal:",xVal.shape)

In [None]:
import keras_tuner as kt
from tensorflow import keras
from keras_tuner import RandomSearch
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
modelName= "CNN_cashew_model" #Specify model name
#save the best weights over9the same file with the model name

modelpath = f"{modelName}_bestweights.hdf5" #the model will be saved in the same folder as where the notebook is
checkpoint = ModelCheckpoint(modelpath, monitor='val_accuracy', verbose=1, save_best_only=True, mode='max') #max validation accuracy
callbacks_list = [checkpoint]

In [None]:
myadam = keras.optimizers.Adam(learning_rate=0.001)

In [None]:
#The parameters used here are obtained from the best performing model as per the keras tuner script output.

model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(112,2,activation ='relu', input_shape = (5,12)),
#     tf.keras.layers.MaxPooling1D(pool_size=2, strides=2),
    tf.keras.layers.Conv1D(112,7, activation ='relu'), 
#     tf.keras.layers.MaxPooling1D(pool_size=2, strides=2),
#     tf.keras.layers.Conv1D(32,2, activation ='relu'), 
    tf.keras.layers.MaxPooling1D(pool_size=2, strides=2),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(80, activation ='relu'),
    tf.keras.layers.Dense(8, activation ='softmax')
])

model.compile(loss = 'sparse_categorical_crossentropy', 
     optimizer = myadam,               
              metrics = ['accuracy'])
model.summary()

In [None]:
history = model.fit(
    xTrain,
    yTrain,
    batch_size=30,
    epochs=200,
    validation_data=(xTest, yTest), 
    class_weight = class_weight,
    callbacks=callbacks_list,
)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()
plt.show()

In [None]:
model_json = model.to_json()
with open(f"{modelName}.json", "w") as json_file:
    json_file.write(model_json

In [None]:
import tensorflow_hub as hub
from tensorflow.keras.models import model_from_json
# load json and create model
json_file = open(f"{modelName}.json", "r")
loaded_model_json = json_file.read()
json_file.close()

In [None]:
model = model_from_json(loaded_model_json, custom_objects={'KerasLayer': hub.KerasLayer})
model.load_weights(f"{modelName}_bestweights.hdf5")

In [None]:
predictions = model.predict(xVal, verbose = 1)
len(predictions)

In [None]:
classification = []
for row in predictions:
    value = np.argmax(row)
    classification.append(valu

In [None]:
y_predicted = np.array(classification)

In [None]:
cMatrix = confusion_matrix(yVal, y_predicted)
print(cMatrix)

In [None]:
accuracy_score(yVal,y_predicted)

In [None]:
from sklearn.metrics import f1_score

f1_score(yVal, y_predicted, average=None)
f1_score(yVal, y_predicted, average='weighted')