# Introduction:
The "Eyes on the Ground'' project is a collaboration between ACRE Africa, the International Food Policy Research Institute (IFPRI), and the Lacuna Fund, to create a large machine learning (ML) dataset that provides a close-up view of smallholder farmer's fields based upon previous work within the Picture Based Insurance framework.

In order to help farmers across Africa manage agricultural risk, ACRE Africa utilizes image data to settle insurance claims and carry out loss assessment. ACRE reviews smartphone pictures of insured crops sent in by farmers to verify whether a farmer’s crops werelooking at damaged,s and whether this damage was related to weather, pests and diseases, oras well as man-made factors such as fire, to evaluate an insurance claim and determine appropriate compensation.

Evaluating images for thousands of insured smallholder farmers to verify insurance claims is however time-consuming, and this often slows down claims settlement. ACRE Africa is therefore looking at artificial intelligence to automate claims settlement, building on the training data that ACRE Africa and IFPRI produced with support from the Lacuna Fund.

Since most claims are related to drought, this challenge will ask participants to predict drought damage from smartphone images of crops taken in the past. The Eyes-on-the-Ground project has already successfully trained models to predict drought damage in the first two seasons, but those models did not transfer well into the third season on which data are available.(https://zindi.africa/competitions/cgiar-eyes-on-the-ground-challenge)

# Imports

In [None]:
import tensorflow as tf
tf.random.set_seed(42)
import numpy as np
np.random.seed(42)
import os
import pandas as pd
import zipfile
import random
import shutil
import tensorflow_hub as hub
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import InverseTimeDecay
from sklearn.model_selection import KFold
from shutil import copyfile
import matplotlib.pyplot as plt
from tensorflow.keras import models , layers
from tensorflow.keras.preprocessing import image_dataset_from_directory
from sklearn.model_selection import train_test_split

# Data Cleaning

In [None]:
local_zip = '/content/drive/MyDrive/Data/test.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/content/test')
zip_ref.close()
local_zip = '/content/drive/MyDrive/Data/train.zip'
zip_ref   = zipfile.ZipFile(local_zip, 'r')
zip_ref.extractall('/content/train')
zip_ref.close()


In [None]:
source_path_train = '/content/train'
source_path_test = '/content/test'
print(f"There are {len(os.listdir(source_path_train))} train images.")
print(f"There are {len(os.listdir(source_path_test))} test images .")

There are 26068 train images.
There are 8663 test images .


In [None]:
test=pd.read_csv("/content/Test.csv")
train=pd.read_csv("/content/Train.csv")
trainDR = train[train["extent"] !=0].dropna()
trainDR=trainDR.drop("damage",axis=1)
testDR = test[test["damage"] == 'DR']

# Preprocessing

In [None]:
IMG_SIZE = 224
BATCH_SIZE = 128
INPUT_SHAPE = [None, 224, 224, 3]
feature=[None,8]

In [None]:
def process_image(image_path):
  image = tf.io.read_file(image_path)
  image = tf.image.decode_jpeg(image,3)
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, size=[IMG_SIZE,IMG_SIZE])
  return image
def process_image1(image):
  image = tf.image.convert_image_dtype(image, tf.float32)
  image = tf.image.resize(image, size=[IMG_SIZE,IMG_SIZE])
  return image
def get_image_label1(image,data):
  image = process_image1(image)
  return (image,data),
def get_image_label2(image,data):
  image = process_image(image)
  return (image,data),
def get_image_label(data,label=None):
  image = process_image(data[0])
  return ((image,data[1]),label)

In [None]:
def create_data_batches(x, y=None, batch_size=BATCH_SIZE, valid_data=False, test_data=False):
  if test_data:
    data = tf.data.Dataset.from_tensor_slices((y,tf.constant(x)))
    data = data.map(get_image_label1)
    return data.batch(BATCH_SIZE)
  elif valid_data:
    print("Creating validation data batches...")
    train2=pd.get_dummies(x[['growth_stage','season']])
    data = tf.data.Dataset.from_tensor_slices(((tf.constant(x['filename']),tf.constant(train2)),tf.constant(y)))
    data = data.map(get_image_label)
    return data.batch(BATCH_SIZE)
  else:
    print("Creating training data batches...")
    train2=pd.get_dummies(x[['growth_stage','season']])
    data = tf.data.Dataset.from_tensor_slices(((tf.constant(x['filename']),tf.constant(train2)),tf.constant(y)))
    data = data.map(get_image_label)
    return data.batch(BATCH_SIZE)

In [None]:
def create_data_batches_test(x, y=None, batch_size=BATCH_SIZE, valid_data=False, test_data=False):
  if test_data:
    print("Creating test data batches...")
    train2=pd.get_dummies(x[['growth_stage','season']])
    data = tf.data.Dataset.from_tensor_slices((tf.constant(x['filename']),tf.constant(train2)))
    data = data.map(get_image_label2)
    return data.batch(BATCH_SIZE)
  elif valid_data:
    print("Creating validation data batches...")
    train2=pd.get_dummies(x[['growth_stage','season']])
    data = tf.data.Dataset.from_tensor_slices(((tf.constant(x['filename']),tf.constant(train2)),tf.constant(y)))
    data = data.map(get_image_label)
    return data.batch(BATCH_SIZE)
  else:
    print("Creating training data batches...")
    train2=pd.get_dummies(x[['growth_stage','season']])
    data = tf.data.Dataset.from_tensor_slices(((tf.constant(x['filename']),tf.constant(train2)),tf.constant(y)))
    data = data.map(get_image_label)
    return data.batch(BATCH_SIZE)

In [None]:
train1=trainDR.sample(frac=1,random_state=42).reset_index(drop=True)
for i in range(4510):
  train1['filename'][i]='train/' + train1.iloc[i,1]
X=train1.drop("extent",axis=1)
y=train1["extent"]

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  train1['filename'][i]='train/' + train1.iloc[i,1]


# Model

In [None]:
#MODEL_URL = "https://tfhub.dev/google/imagenet/resnet_v2_50/classification/5"
#MODEL_URL = "https://tfhub.dev/google/tf2-preview/mobilenet_v2/classification/4"
#MODEL_URL = "https://tfhub.dev/google/imagenet/efficientnet_v2_imagenet21k_ft1k_b0/classification/2"
MODEL_URL = "https://tfhub.dev/sayakpaul/swin_base_patch4_window7_224_fe/1"
#MODEL_URL ="https://tfhub.dev/sayakpaul/convnext_xlarge_21k_224/1"
#MODEL_URL = "https://tfhub.dev/sayakpaul/swin_base_patch4_window12_384/1"

In [None]:
initial_learning_rate = 0.001
decay_steps = 1000
decay_rate = 0.5
def create_model(input_shape=INPUT_SHAPE, model_url=MODEL_URL):
    print("Building model with:", MODEL_URL)
    x_input = tf.keras.Input(shape=(224,224,3))
    x = hub.KerasLayer(MODEL_URL)(x_input)
    x1_input = tf.keras.Input(shape=(8,))
    x1 = tf.keras.layers.Dense(512, activation='relu')(x1_input)
    x1 = tf.keras.layers.Dense(512, activation='relu')(x1)
    combined0 = tf.keras.layers.concatenate([x,x1])
    combined = tf.keras.layers.Dense(512, activation='relu')(combined0)
    combined = tf.keras.layers.Dense(512, activation='relu')(combined)
    y1_output = tf.keras.layers.Dense(1, name='y1_output')(combined)
    lr_schedule = InverseTimeDecay(initial_learning_rate, decay_steps, decay_rate)
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)
    model = tf.keras.Model(inputs=[x_input, x1_input],outputs=[y1_output])
    model.compile(
        optimizer=optimizer,
        loss={'y1_output': 'mean_squared_error'},
              metrics={'y1_output': tf.keras.metrics.RootMeanSquaredError()})
    return model


In [None]:
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_root_mean_squared_error', patience=5, restore_best_weights=True,mode='min')
class DetectOverfittingCallback(tf.keras.callbacks.Callback):
    def __init__(self, threshold=2):
        super(DetectOverfittingCallback, self).__init__()
        self.threshold = threshold
        self.best_val_loss = float('inf')
        self.best_weights = None
    def on_epoch_end(self, epoch, logs=None):
        val_loss = logs["val_root_mean_squared_error"]
        train_loss = logs["root_mean_squared_error"]
        if val_loss < self.best_val_loss:
            self.best_val_loss = val_loss
            self.best_weights = self.model.get_weights()
        ratio = val_loss - train_loss
        print("Epoch: {}, Val-Train: {:.2f}".format(epoch, ratio))
        if ratio > self.threshold:
            print("Stopping training...")
            self.model.stop_training = True
    def get_best_weights(self):
        return self.best_weights

In [None]:
num_folds = 3
NUM_EPOCHS = 30
def cross_validation():
  kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)
  fold_results = []
  for fold, (train_index, val_index) in enumerate(kf.split(X)):
      print(f"Training fold {fold+1}/{num_folds}")
      X_fold_train, X_fold_val = X.iloc[train_index], X.iloc[val_index]
      y_fold_train, y_fold_val = y.iloc[train_index], y.iloc[val_index]
      train_data = create_data_batches(X_fold_train, y_fold_train)
      val_data = create_data_batches(X_fold_val, y_fold_val, valid_data=True)
      model = create_model()
      history=model.fit(train_data,
                  epochs=NUM_EPOCHS,
                  validation_data=val_data,
                  validation_freq=1,
                  shuffle=True)
      fold_results.append(model.evaluate(X_fold_val, verbose=0))
  avg_loss = np.mean([result[0] for result in fold_results])
  avg_acc = np.mean([result[1] for result in fold_results])
  print(f"Average Loss: {avg_loss:.4f}, Average Accuracy: {avg_acc:.4f}")
  return model,history
model,history=cross_validation()

In [None]:
model.summary()

In [None]:
import matplotlib.pyplot as plt
def plot_history(history):
    plt.figure(figsize=(12, 6))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and Validation Loss')
    plt.legend()
    plt.subplot(1, 2, 2)
    plt.plot(history.history['root_mean_squared_error'], label='root_mean_squared_error')
    plt.plot(history.history['val_root_mean_squared_error'], label='val_root_mean_squared_error')
    plt.xlabel('Epoch')
    plt.ylabel('Metric')
    plt.title('Training and Validation Metric')
    plt.legend()
    plt.tight_layout()
    plt.show()
plot_history(history)

# Submission

In [None]:
test1=testDR.reset_index(drop=True)
for i in range(1503):
  test1['filename'][i]='test/' + test1.iloc[i,1]

In [None]:
def test_augmentation(pas,pas1):
  test2=pd.get_dummies(test1[['growth_stage','season']])
  pred=[]
  for i in range(1503):
    print(i)
    predictions1 = []
    crop_locations = []
    image = tf.io.read_file(test1["filename"][i])
    image = tf.image.decode_jpeg(image,3)
    a=image.shape[0]
    b=image.shape[1]
    p=pas
    c=int(image.shape[0]/p)*pas1
    d=int(image.shape[1]/p)*pas1
    crop_size = (a-pas1*c,b-pas1*d)
    stride1 =int(image.shape[0]/p)
    stride2 =int(image.shape[1]/p)
    for y in range(0,c,stride1):
        for x in range(0, d,stride2):
            crop = image[y:y+crop_size[0], x:x+crop_size[1]]
            crop=create_data_batches([test2.iloc[i]],[crop],test_data=True)
            prediction = model.predict(crop)
            predictions1.append(prediction)
            crop_locations.append(((x,x+crop_size[1]), (y,y+crop_size[0])))
    aggregated_prediction = np.mean(predictions1, axis=0)
    pred.append(aggregated_prediction)
  return pred

In [None]:
pred=test_augmentation(10,2)
sub=pd.read_csv("/content/SampleSubmission.csv")
for i in range(1503):
  indice=sub[sub["ID"]==test1["ID"][i]].index
  sub.iloc[indice,1]=pred[i][0][0]
sub.to_csv("/content/SampleSubmission.csv",index=False)