In [13]:
import ee
import numpy as np
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV
from sklearn.model_selection import StratifiedKFold
from xgboost import XGBClassifier


SEED = 42
random.seed(SEED)
np.random.seed(SEED)

In [14]:
from google.colab import drive
drive.mount('/content/drive/')
path_drive = r'drive/MyDrive/CropLand/'

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [15]:
train_data = pd.read_csv(path_drive + 'train_sen1_sen2.csv')
test_data = pd.read_csv(path_drive + 'test_sen1_sen2.csv')

In [16]:
train_data.columns

Index(['ID', 'Lat', 'Lon', 'Target', 'country', 'NDVI_change', 'NDWI_change',
       'RNDVI_change', 'BI_change', 'NDMI_change', 'SR_change', 'NDVI', 'NDWI',
       'RNDVI', 'BI', 'NDMI', 'SR', 'B2', 'B3', 'B4', 'VV_change_asc',
       'VH_change_asc', 'VH/VH_change_asc', 'VV+VH_change_asc', 'VV_asc',
       'VH_asc', 'VH/VH_asc', 'VV+VH_asc', 'VV_change_des', 'VH_change_des',
       'VH/VH_change_des', 'VV+VH_change_des', 'VV_des', 'VH_des', 'VH/VH_des',
       'VV+VH_des'],
      dtype='object')

In [54]:
def create_X_y_country(pd, country):
  X = pd[pd['country'] == country].copy()
  y = X['Target'].copy()

  X = X.drop(columns = ['ID', 'Lat', 'Lon', 'Target', 'country'])

  return scaler.transform(X), y

y_train = train_data['Target']
X_train = train_data.drop(columns = ['ID', 'Lat', 'Lon', 'Target', 'country'])

from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(X_train)
X_train = scaler.transform(X_train)

X_train_afg, y_train_afg    = create_X_y_country(train_data, 'Afghanistan')
X_train_iran, y_train_iran  = create_X_y_country(train_data, 'Iran')
X_train_sudan, y_train_sudan = create_X_y_country(train_data, 'Sudan')


X_test_afg    = test_data[test_data['country'] == 'Afghanistan'].drop(columns = ['ID', 'Lat', 'Lon', 'country'])
X_test_iran   = test_data[test_data['country'] == 'Iran'].drop(columns = ['ID', 'Lat', 'Lon', 'country'])
X_test_sudan  = test_data[test_data['country'] == 'Sudan'].drop(columns = ['ID', 'Lat', 'Lon',  'country'])

In [38]:
def create_xgb_model(X_now,y_now, parameters, folds, param_comb):

  model = XGBClassifier(objective= 'binary:logistic', nthread = 1)

  random_search = RandomizedSearchCV(model, param_distributions=parameters,
                                   n_iter=param_comb, scoring='accuracy', cv=skf.split(X_now, y_now), verbose=1, random_state=SEED )

  random_search.fit(X_now, y_now)

  print(random_search.best_score_)

  return random_search




# A parameter grid for XGBoost
params = {
        'n_estimators': [400, 500, 600, 700, 800],
        'learning_rate': [ 0.01, 0.005],
        'gamma': [0.8, 1, 1.2, 1.5],
        'colsample_bytree': [0.2, 0.4, 0.6, 0.8, 1],
        'max_depth': [3, 5, 7, 10]
        }
folds = 3

skf = StratifiedKFold(n_splits=folds, shuffle = True, random_state = SEED)


In [62]:
r_s_afg = create_xgb_model(X_train_afg, y_train_afg, params, 3, 20)
r_s_iran = create_xgb_model(X_train_iran, y_train_iran, params, 3, 20)
r_s_sudan = create_xgb_model(X_train_sudan, y_train_sudan, params, 3, 20)
r_s_all = create_xgb_model(X_train, y_train, params, 3, 20)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.848026837890484
Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.9579996633239545
Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.9760238559026525
Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.9126666666666666


In [55]:
print(type(X_train_afg))

<class 'numpy.ndarray'>


In [56]:
#only ascending
r_s_all_asc = create_xgb_model(X_train[: , : -8], y_train, params, 3, 20)
r_s_afg_asc = create_xgb_model(X_train_afg[:,:-8], y_train_afg, params, 3, 20)
r_s_iran_asc = create_xgb_model(X_train_iran[:,:-8], y_train_iran, params, 3, 20)
r_s_sudan_asc = create_xgb_model(X_train_sudan[:,:-8], y_train_sudan, params, 3, 20)

Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.9093333333333334
Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.831998653295818
Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.9600076954524686
Fitting 3 folds for each of 20 candidates, totalling 60 fits
0.9760238559026525


In [None]:
import lightgbm
!pip install catboost
import catboost

In [None]:
def create_lgbm_model(X_now,y_now, parameters, folds, param_comb):

  model = lightgbm.LGBMClassifier(boosting_type='gbdt',  objective='binary', metric='auc')

  random_search = RandomizedSearchCV(model, param_distributions=parameters,
                                   n_iter=param_comb, scoring='accuracy', cv=skf.split(X_now, y_now), verbose=0, random_state=SEED )

  random_search.fit(X_now, y_now)

  print(random_search.best_score_)

  return random_search


param_grid_lgbm = {
    'num_leaves': [18, 31, 127],
    'reg_alpha': [0.1, 0.5, 0.8],
    'min_data_in_leaf': [30, 50, 100, 300, 400],
    'lambda_l1': [0, 1, 1.5],
    'lambda_l2': [0, 1],
    'learning_rate': [0.1, 0.05, 0.01],
    'max_bin': [255, 400, 500]
    }

r_s_lgbm_afg = create_lgbm_model(X_train_afg, y_train_afg, param_grid_lgbm, 3, 20)
r_s_lgbm_iran = create_lgbm_model(X_train_iran, y_train_iran, param_grid_lgbm, 3, 20)
r_s_lgbm_sudan = create_lgbm_model(X_train_sudan, y_train_sudan, param_grid_lgbm, 3, 20)
r_s_lgbm_all = create_lgbm_model(X_train, y_train, param_grid_lgbm, 3, 20)

In [None]:
r_s_lgbm_afg_asc = create_lgbm_model(X_train_afg[:,:-8], y_train_afg, param_grid_lgbm, 3, 20)
r_s_lgbm_iran_asc = create_lgbm_model(X_train_iran[:,:-8], y_train_iran, param_grid_lgbm, 3, 20)
r_s_lgbm_sudan_asc = create_lgbm_model(X_train_sudan[:,:-8], y_train_sudan, param_grid_lgbm, 3, 20)
r_s_lgbm_all_asc = create_lgbm_model(X_train[:,:-8], y_train, param_grid_lgbm, 3, 20)

In [None]:
from catboost import CatBoostClassifier


def create_cat_model(X_now,y_now, parameters, folds, param_comb):

  model = CatBoostClassifier()

  random_search = RandomizedSearchCV(model, param_distributions=parameters,
                                   n_iter=param_comb, scoring='accuracy', cv=skf.split(X_now, y_now), verbose=0, random_state=SEED )

  random_search.fit(X_now, y_now)

  print(random_search.best_score_)

  return random_search


params_cat = {
    'iterations' : [2, 4, 6, 20, 50],
    'learning_rate' : [1, 0.1, 0.01],
    'depth' : [2, 4, 8, 10],
    'l2_leaf_reg': [0.1, 0.8, 1]
}


r_s_cat_afg = create_cat_model(X_train_afg, y_train_afg, params_cat, 3, 20)
r_s_cat_iran = create_cat_model(X_train_iran, y_train_iran, params_cat, 3, 20)
r_s_cat_sudan = create_cat_model(X_train_sudan, y_train_sudan, params_cat, 3, 20)
r_s_cat_all = create_cat_model(X_train, y_train, params_cat, 3, 20)

In [None]:
model_dict = {
    'Afghanistan': [r_s_afg, r_s_lgbm_afg, r_s_cat_afg],
    'Iran': [r_s_iran, r_s_lgbm_iran, r_s_cat_iran],
    'Sudan': [r_s_sudan, r_s_lgbm_sudan, r_s_cat_sudan]
}

country_test = test_data['country'].copy()
ID = test_data['ID'].copy()

X_test = test_data.drop(columns = ['ID', 'Lat', 'Lon',  'country'])
X_test = scaler.transform(X_test)
predictions = []
for i, el in enumerate(X_test):
  p1 = r_s_cat_all.predict([el])
  p2 = r_s_all.predict([el])
  p3 = r_s_lgbm_all.predict([el])

  preds = [p1, p2, p3]

  for m in model_dict[country_test[i]]:
    preds.append(m.predict([el]))

  weighted_pred = sum(preds)/(len(preds))

  if weighted_pred >= 0.5:
    predictions.append(1)

  else:
    predictions.append(0)





In [74]:
# # Create a submission file
sub_file = pd.DataFrame({'ID': test_data.ID, 'Target': predictions})
sub_file.head()

Unnamed: 0,ID,Target
0,ID_9ZLHTVF6NSU7,1
1,ID_LNN7BFCVEZKA,0
2,ID_SOYSG7W04UH3,1
3,ID_EAP7EXXV8ZDE,1
4,ID_QPRX1TUQVGHU,0


In [75]:
sub_file.to_csv(path_drive + 'sub4.csv', index = False)