# Environment Setup for Reproducibility of same final result

In [25]:
!pip install catboost==1.1.1
!pip install xgboost==1.7.6
!pip install lightgbm==3.3.2
!pip install scikit-learn==1.3.0

Collecting catboost==1.1.1
  Downloading catboost-1.1.1-cp310-none-manylinux1_x86_64.whl (76.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m76.6/76.6 MB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: catboost
  Attempting uninstall: catboost
    Found existing installation: catboost 1.2.2
    Uninstalling catboost-1.2.2:
      Successfully uninstalled catboost-1.2.2
Successfully installed catboost-1.1.1


Collecting xgboost==1.7.6
  Downloading xgboost-1.7.6-py3-none-manylinux2014_x86_64.whl (200.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.3/200.3 MB[0m [31m6.2 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: xgboost
  Attempting uninstall: xgboost
    Found existing installation: xgboost 2.0.0
    Uninstalling xgboost-2.0.0:
      Successfully uninstalled xgboost-2.0.0
Successfully installed xgboost-1.7.6


Collecting lightgbm==3.3.2
  Downloading lightgbm-3.3.2-py3-none-manylinux1_x86_64.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m6.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: lightgbm
  Attempting uninstall: lightgbm
    Found existing installation: lightgbm 4.0.0
    Uninstalling lightgbm-4.0.0:
      Successfully uninstalled lightgbm-4.0.0
Successfully installed lightgbm-3.3.2


Collecting scikit-learn==1.3.0
  Downloading scikit_learn-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (10.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.8/10.8 MB[0m [31m69.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: scikit-learn
  Attempting uninstall: scikit-learn
    Found existing installation: scikit-learn 1.2.2
    Uninstalling scikit-learn-1.2.2:
      Successfully uninstalled scikit-learn-1.2.2
Successfully installed scikit-learn-1.3.0


In [1]:
import xgboost
import catboost
import lightgbm
import sklearn
print(xgboost.__version__)
print(catboost.__version__)
print(lightgbm.__version__)
print(sklearn.__version__)

1.7.6
1.1.1
3.3.2
1.3.0


# 1. Import necessary packages

In [2]:
import pandas as pd
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split,KFold,cross_val_score
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier,HistGradientBoostingClassifier,VotingClassifier
from catboost import CatBoostClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier
from sklearn.pipeline import Pipeline


# 2. Import Data

In [4]:
train=pd.read_csv('Train.csv')
test=pd.read_csv('Test.csv')
sample=pd.read_csv('SampleSubmission.csv')

# 3.Download Data from google-earth-engine

In [5]:
import ee

In [6]:
# Get authetication token and sign in to Google Earth Engine
ee.Authenticate()
ee.Initialize()

To authorize access needed by Earth Engine, open the following URL in a web browser and follow the instructions. If the web browser does not start automatically, please manually browse the URL below.

    https://code.earthengine.google.com/client-auth?scopes=https%3A//www.googleapis.com/auth/earthengine%20https%3A//www.googleapis.com/auth/devstorage.full_control&request_id=a0SEqGUsm9VxEbtohRdUNVN9kenCbYGddW-OF2oIb24&tc=h1hHq1brrRfAvedvBD5aaWWjxRVIb7EYGsGGBFUZ0rI&cc=hKLmAosIvSb9zUC5Q0ca-qSM6VpQToufkD_ZBUZPA4c

The authorization workflow will generate a code, which you should paste in the box below.
Enter verification code: 4/1AfJohXmieWmoc0T_t4llfaCeucRbl2K5Q0yEeoDrr5lDPV5qAbnM1ENnlW0

Successfully saved authorization token.


In [7]:
def get_country_name(latitude, longitude):

  # Create a point using the coordinates
  point = ee.Geometry.Point([longitude, latitude])

  # Use the built-in 'GADM' dataset to get country boundaries
  countries = ee.FeatureCollection("FAO/GAUL/2015/level0")

  # Filter the collection to contain only the country where the point is located
  country = ee.Feature(countries.filterBounds(point).first())

  # Get the country name
  country_name = country.get('ADM0_NAME').getInfo()

  return country_name

## Train Data Download

In [8]:
%%time

# Define date ranges
date_range_afghanistan = ee.DateRange('2022-04-01', '2022-04-30')
date_range_other = ee.DateRange('2019-07-01', '2020-06-30')

# Load the image collection
s2_collection = ee.ImageCollection('COPERNICUS/S2_SR_HARMONIZED')
dynamic_collection = ee.ImageCollection('GOOGLE/DYNAMICWORLD/V1')
s1_collection = ee.ImageCollection('COPERNICUS/S1_GRD')

# Define the bands of interest
s2_bands = ['B2', 'B3', 'B4', 'B8','AOT','WVP','TCI_R','TCI_G','TCI_B','MSK_SNWPRB']
s1_bands = ['VV','VH']
dynamic_bands=['trees', 'grass','crops', 'water', 'flooded_vegetation', 'shrub_and_scrub', 'built', 'bare', 'snow_and_ice']


# Read the CSV table
csv_data = train.copy()  # Replace with your actual CSV file path

# Function to extract mean pixel values for a labeled location
def extract_mean_pixel_values(row):
  lat = row['Lat']
  lon = row['Lon']
#     print(lon,lat)
  # Create a point geometry for the labeled location
  point = ee.Geometry.Point(lon, lat)

  # Use the built-in 'GADM' dataset to get country boundaries
  countries = ee.FeatureCollection("FAO/GAUL/2015/level0")

  # Filter the collection to contain only the country where the point is located
  country = ee.Feature(countries.filterBounds(point).first())

  # Get the country name
  country_name = country.get('ADM0_NAME').getInfo()
#     print(country_name)

  # Filter the collection based on country and desired time range
  s2_filtered = (
      s2_collection
      .filterBounds(point)
      .filterDate(date_range_afghanistan if country_name == 'Afghanistan' else date_range_other)
  )
  s1_filtered = (
      s1_collection
      .filterBounds(point)
      .filterDate(date_range_afghanistan if country_name == 'Afghanistan' else date_range_other)
  )
  dynamic_filtered = (
      dynamic_collection
      .filterBounds(point)
      .filterDate(date_range_afghanistan if country_name == 'Afghanistan' else date_range_other)
  )



  # Calculate the mean pixel values for the bands of interest at the labeled location
  s2_mean_values = s2_filtered.mean().reduceRegion(
      reducer=ee.Reducer.mean(),
      geometry=point,
      scale=10
  )
  s1_mean_values = s1_filtered.mean().reduceRegion(
      reducer=ee.Reducer.mean(),
      geometry=point,
      scale=10
  )

  dynamic_mean_values = dynamic_filtered.mean().reduceRegion(
      reducer=ee.Reducer.mean(),
      geometry=point,
      scale=10
  )

  # Extract the mean pixel values for the bands
  s2_values = [s2_mean_values.get(band).getInfo() for band in s2_bands]
  s1_values = [s1_mean_values.get(band).getInfo() for band in s1_bands]
  dynamic_values = [dynamic_mean_values.get(band).getInfo() for band in dynamic_bands]


  values=s2_values+s1_values+dynamic_values

  return values

# Apply function to extract mean pixel values
mean_pixel_values = csv_data.apply(extract_mean_pixel_values, axis=1, result_type='expand')
mean_pixel_values.columns = s2_bands+s1_bands+dynamic_bands
train_data = pd.concat([csv_data, mean_pixel_values], axis=1)

# Preview the updated DataFrame
train_data.head()


p: 0
CPU times: user 11.6 s, sys: 177 ms, total: 11.8 s
Wall time: 1min 13s


Unnamed: 0,ID,Lat,Lon,Target,B2,B3,B4,B8,AOT,WVP,...,VH,trees,grass,crops,water,flooded_vegetation,shrub_and_scrub,built,bare,snow_and_ice
0,ID_SJ098E7S2SY9,34.162491,70.763668,0,1236.0,1721.833333,2106.833333,2643.5,449.166667,1297.333333,...,-15.952387,0.023125,0.024985,0.0884,0.045123,0.032309,0.199199,0.048041,0.500652,0.038129
1,ID_CWCD60FGJJYY,32.075695,48.492047,0,2049.457143,2181.942857,2155.442857,3555.885714,204.157143,1267.128571,...,-16.374092,0.372387,0.069338,0.152259,0.035795,0.038634,0.200534,0.038115,0.052283,0.040615
2,ID_R1XF70RMVGL3,14.542826,33.313483,1,1396.208333,1625.236111,1769.208333,2836.666667,185.902778,1974.541667,...,-20.256233,0.079381,0.085005,0.509064,0.045968,0.03834,0.068565,0.033445,0.11031,0.029883
3,ID_0ZBIDY0PEBVO,14.35948,33.284108,1,1955.930556,2143.680556,2327.236111,3099.180556,192.875,2061.527778,...,-21.615505,0.042639,0.078207,0.528659,0.043161,0.036575,0.075171,0.033495,0.130665,0.03139
4,ID_C20R2C0AYIT0,14.419128,33.52845,0,2249.902778,2590.541667,2921.097222,3415.423611,189.111111,2122.5625,...,-19.542158,0.033241,0.030526,0.063424,0.028514,0.030459,0.063523,0.638202,0.08001,0.032065


## Test Data Download

In [9]:
%%time
# Extract data for test
csv_data = test.copy()

# Extract mean pixel values for each label and create additional columns in the DataFrame
mean_pixel_values = csv_data.apply(extract_mean_pixel_values, axis=1, result_type='expand')
mean_pixel_values.columns = s2_bands+s1_bands+dynamic_bands
test_data = pd.concat([csv_data, mean_pixel_values], axis=1)

# Print the updated DataFrame
test_data.head()

CPU times: user 11.6 s, sys: 174 ms, total: 11.8 s
Wall time: 1min 11s


Unnamed: 0,ID,Lat,Lon,B2,B3,B4,B8,AOT,WVP,TCI_R,...,VH,trees,grass,crops,water,flooded_vegetation,shrub_and_scrub,built,bare,snow_and_ice
0,ID_9ZLHTVF6NSU7,34.254835,70.348699,3153.666667,3165.0,2861.833333,4458.666667,449.25,1191.75,159.916667,...,-18.519317,0.06087,0.070218,0.395157,0.030207,0.0416,0.082271,0.235463,0.044023,0.040147
1,ID_LNN7BFCVEZKA,32.009669,48.535526,2631.871429,2935.5,3246.328571,3951.728571,203.828571,1301.371429,227.171429,...,-22.621158,0.055985,0.032174,0.178837,0.032886,0.034625,0.319738,0.07478,0.224709,0.046225
2,ID_SOYSG7W04UH3,14.431884,33.399991,1414.291667,1625.180556,1795.104167,2487.694444,189.118056,2051.111111,145.597222,...,-20.67852,0.054424,0.072925,0.587744,0.048327,0.042666,0.058704,0.03119,0.073744,0.030238
3,ID_EAP7EXXV8ZDE,14.281866,33.441224,1750.763889,1902.777778,1967.25,2836.430556,192.694444,2086.388889,132.125,...,-19.673184,0.069731,0.060598,0.58739,0.03951,0.039778,0.085229,0.034345,0.054782,0.0286
4,ID_QPRX1TUQVGHU,14.399365,33.109566,2142.798611,2299.506944,2578.125,3111.479167,189.201389,2053.694444,187.013889,...,-19.96418,0.025692,0.03023,0.069138,0.031451,0.02311,0.06611,0.647564,0.070085,0.036582


# 4. Model Training

In [13]:
CatBoost_Parameters= {'nan_mode': 'Min',
                      'eval_metric': 'Logloss',
                      'iterations': 1000,
                      'sampling_frequency': 'PerTree',
                      'leaf_estimation_method': 'Newton',
                      'grow_policy': 'SymmetricTree',
                      'penalties_coefficient': 1,
                      'boosting_type': 'Plain',
                      'model_shrink_mode': 'Constant',
                      'feature_border_type': 'GreedyLogSum',
                      'eval_fraction': 0,
                      'l2_leaf_reg': 3,
                      'random_strength': 1,
                      'rsm': 1, 'boost_from_average': False,
                      'model_size_reg': 0.5,
                      'subsample': 0.800000011920929,
                      'use_best_model': False,
                      'class_names': [0, 1],
                      'random_state': 42,
                      'depth': 6,
                      'posterior_sampling': False,
                      'border_count': 254,
                      'classes_count': 0,
                      'auto_class_weights': 'None',
                      'sparse_features_conflict_fraction': 0,
                      'leaf_estimation_backtracking': 'AnyImprovement',
                      'best_model_min_trees': 1,
                      'model_shrink_rate': 0,
                      'min_data_in_leaf': 1,
                      'loss_function': 'Logloss',
                      'learning_rate': 0.012249999679625034,
                      'score_function': 'Cosine',
                      'task_type': 'CPU',
                      'leaf_estimation_iterations': 10,
                      'bootstrap_type': 'MVS',
                      'max_leaves': 64,
                      'verbose': False}


XGBoost_Parameters={'objective': 'binary:logistic',
                    'learning_rate': 0.1,
 'use_label_encoder': None,
 'base_score': None,
 'booster': None,
 'callbacks': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': None,
 'early_stopping_rounds': None,
 'enable_categorical': False,
 'eval_metric': None,
 'feature_types': None,
 'gamma': None, 'gpu_id': None,
 'grow_policy': None,
 'importance_type': None,
 'interaction_constraints': None,
 'learning_rate': None,
 'max_bin': None,
 'max_cat_threshold': None,
 'max_cat_to_onehot': None,
 'max_delta_step': None,
 'max_depth': None,
 'max_leaves': None,
 'min_child_weight': None,
 'monotone_constraints': None,
 'n_estimators': 100,
 'n_jobs': None,
 'num_parallel_tree': None,
 'predictor': None,
 'random_state': 42,
 'reg_alpha': None,
 'reg_lambda': None,
 'sampling_method': None,
 'scale_pos_weight': None,
 'subsample': None,
 'tree_method': None,
 'validate_parameters': None, 'verbosity': None}

LightGBM_Parameters={'boosting_type': 'gbdt',
 'class_weight': None,
 'colsample_bytree': 1.0,
 'importance_type': 'split',
 'learning_rate': 0.1,
 'max_depth': -1,
 'min_child_samples': 20,
 'min_child_weight': 0.001,
 'min_split_gain': 0.0,
 'n_estimators': 100,
 'n_jobs': -1,
 'num_leaves': 31,
 'objective': None,
 'random_state': 42,
 'reg_alpha': 0.0,
 'reg_lambda': 0.0,
 'silent': 'warn',
 'subsample': 1.0,
 'subsample_for_bin': 200000,
 'subsample_freq': 0}

In [14]:
# Create the pipeline
model = Pipeline(steps=[
    ('Scaler', StandardScaler()),
    ('voting', VotingClassifier([
        ('cat', CatBoostClassifier(**CatBoost_Parameters)),
        ('xgb', XGBClassifier(**XGBoost_Parameters)),
        ('lgbm', LGBMClassifier(**LightGBM_Parameters)),
    ], voting='hard'))
])

In [15]:
model.fit(train_data.drop(['ID','Target'],axis=1),train_data.Target)

In [None]:
pred=model.predict(test_data.drop(['ID'],axis=1))
sample['Target']=pred
sample



Unnamed: 0,ID,Target
0,ID_9ZLHTVF6NSU7,1
1,ID_LNN7BFCVEZKA,0
2,ID_SOYSG7W04UH3,1
3,ID_EAP7EXXV8ZDE,1
4,ID_QPRX1TUQVGHU,0
...,...,...
1495,ID_6LVHE89NN5VE,0
1496,ID_M51GDSUBKS8Q,1
1497,ID_469MTLRKJC64,0
1498,ID_DMH9P3N6O3DK,0


In [None]:
sample.to_csv('submission.csv',index=False)