In [1]:
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')

# Visualization
import ipyleaflet
import matplotlib.pyplot as plt
from IPython.display import Image
import seaborn as sns

# Data Science
import numpy as np
import pandas as pd

# Feature Engineering
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

# Machine Learning
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import f1_score, accuracy_score,classification_report,confusion_matrix

# Planetary Computer Tools
import pystac
import pystac_client
#import odc
from pystac_client import Client
from pystac.extensions.eo import EOExtension as eo
from odc.stac import stac_load
import planetary_computer as pc
pc.settings.set_subscription_key('bb9d87ff6c574a01afa1dc640736eb22')

# Others
import requests
import rich.table
from itertools import cycle
from tqdm import tqdm
from time import sleep
tqdm.pandas()

In [2]:
crop_presence_data = pd.read_csv(r"Downloads/Crop_Location_Data_20221201.csv")
crop_presence_data.head()

Unnamed: 0,Latitude and Longitude,Class of Land
0,"(10.323727047081501, 105.2516346045924)",Rice
1,"(10.322364360592521, 105.27843410554115)",Rice
2,"(10.321455902933202, 105.25254306225168)",Rice
3,"(10.324181275911162, 105.25118037576274)",Rice
4,"(10.324635504740822, 105.27389181724476)",Rice


In [3]:
def get_sentinel_data(latlong,time_slice,assests):
    '''
    Returns VV and VH values for a given latitude and longitude 
    Attributes:
    latlong - A tuple with 2 elements - latitude and longitude
    time_slice - Timeframe for which the VV and VH values have to be extracted
    '''

    latlong=latlong.replace('(','').replace(')','').replace(' ','').split(',')
    
    lat_long = list(map(float,latlong))
    time_of_interest = time_slice
    
    box_size_deg = 0.0004 # yields approximately 5x5 pixel region

    min_lon = lat_long[1]-box_size_deg/2
    min_lat = lat_long[0]-box_size_deg/2
    max_lon = lat_long[1]+box_size_deg/2
    max_lat = lat_long[0]+box_size_deg/2

    bbox_of_interest = (min_lon, min_lat, max_lon, max_lat)
    resolution = 10  
    scale = resolution / 111320.0 

    catalog = pystac_client.Client.open(
        "https://planetarycomputer.microsoft.com/api/stac/v1"
    )
    search = catalog.search(
        collections=["sentinel-1-rtc"], bbox=bbox_of_interest, datetime=time_of_interest,
    )
    items = list(search.get_all_items())
    

    bands_of_interest = assests
    data = stac_load(items, patch_url=pc.sign, bbox=bbox_of_interest, resolution=scale,crs="EPSG:4326")
    vh = data['vh'].mean().item()
    vv = data['vv'].mean().item()
    
    return vh,vv

In [4]:
vh_vv_first = []
vh_vv_sec = []
vh_vv_third = []
vh_vv_first_2 = []
vh_vv_sec_2 = []
vh_vv_third_2 = []

In [6]:
#We define the time slices of our interest
time_slice_first = "2021-11-01/2021-12-31"
time_slice_sec = "2022-01-01/2022-02-28"
time_slice_third = "2022-03-01/2022-04-30"

time_slice_first_2 = "2022-04-01/2022-05-31"
time_slice_sec_2 = "2022-05-01/2022-06-30"
time_slice_third_2 = "2022-07-01/2022-08-31"

In [None]:
'''
We start extracting the data, we will not reexecute this code because it takes hours to extract and we already have extracted and stored it as csv file. 
We intend to load these csv files instead.
'''

In [None]:

assests = ['vh','vv']

for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    vh_vv_first.append(get_sentinel_data(coordinates,time_slice_first, assests))
        
vh_vv_data = pd.DataFrame(vh_vv_first,columns =['vh_s','vv_s'])
vh_vv_data.to_csv('crop_data_1.csv')


In [None]:
assests = ['vh','vv']
for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    vh_vv_sec.append(get_sentinel_data(coordinates,time_slice_sec, assests))
        
vh_vv_data_2 = pd.DataFrame(vh_vv_sec,columns =['vh','vv'])
vh_vv_data_2.to_csv('crop_data_2.csv')

In [None]:
assests = ['vh','vv']
for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    vh_vv_third.append(get_sentinel_data(coordinates,time_slice_third, assests))
        
vh_vv_data_3 = pd.DataFrame(vh_vv_third,columns =['vh_i','vv_i'])
vh_vv_data_3.to_csv('crop_data_3.csv')

In [None]:
assests = ['vh', 'vv']
for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    vh_vv_first_2.append(get_sentinel_data(coordinates,time_slice_first_2, assests))
       
vh_vv_data_4 = pd.DataFrame(vh_vv_first_2,columns =['vh2_s','vv2_s'])
vh_vv_data_4.to_csv('crop_data_4.csv')

In [None]:
assests = ['vh','vv']
for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    vh_vv_sec_2.append(get_sentinel_data(coordinates,time_slice_sec_2, assests))
        
vh_vv_data_5 = pd.DataFrame(vh_vv_sec_2,columns =['vh2','vv2'])
vh_vv_data_5.to_csv('crop_data_5.csv')

In [None]:
assests = ['vh','vv']
for coordinates in tqdm(crop_presence_data['Latitude and Longitude']):
    vh_vv_third_2.append(get_sentinel_data(coordinates,time_slice_third_2, assests))
       
vh_vv_data_6 = pd.DataFrame(vh_vv_third_2,columns =['vh2_i','vv2_i'])
vh_vv_data_6.to_csv('crop_data_6.csv')

In [None]:
#We read the data through the csv files we generated earlier
vh_vv_first = pd.read_csv("crop_data_1.csv")
vh_vv_first = vh_vv_first.iloc[: , 1:]
vh_vv_first = vh_vv_first.values.tolist()
vh_vv_first = list(vh_vv_first)


vh_vv_sec = pd.read_csv("crop_data_2.csv")
vh_vv_sec = vh_vv_sec.iloc[: , 1:]
vh_vv_sec = vh_vv_sec.values.tolist()
vh_vv_sec = list(vh_vv_sec)


vh_vv_third = pd.read_csv("crop_data_3.csv")
vh_vv_third = vh_vv_third.iloc[: , 1:]
vh_vv_third = vh_vv_third.values.tolist()
vh_vv_third = list(vh_vv_third)


vh_vv_first_2 = pd.read_csv("crop_data_4.csv")
vh_vv_first_2 = vh_vv_first_2.iloc[: , 1:]
vh_vv_first_2 = vh_vv_first_2.values.tolist()
vh_vv_first_2 = list(vh_vv_first_2)


vh_vv_sec_2 = pd.read_csv("crop_data_5.csv")
vh_vv_sec_2 = vh_vv_sec_2.iloc[: , 1:]
vh_vv_sec_2 = vh_vv_sec_2.values.tolist()
vh_vv_sec_2 = list(vh_vv_sec_2)


vh_vv_third_2 = pd.read_csv("crop_data_6.csv")
vh_vv_third_2 = vh_vv_third_2.iloc[: , 1:]
vh_vv_third_2 = vh_vv_third_2.values.tolist()
vh_vv_third_2 = list(vh_vv_third_2)

In [21]:
def combine_two_datasets(dataset1,dataset2):
    '''
    Returns a  vertically concatenated dataset.
    Attributes:
    dataset1 - Dataset 1 to be combined 
    dataset2 - Dataset 2 to be combined
    '''
    data = pd.concat([dataset1,dataset2], axis=1)
    return data

In [None]:
# Converting to dataframes to combine into a csv file later
vh_vv_data = pd.DataFrame(vh_vv_sec,columns =['vh','vv'])
vh_vv_data_2 = pd.DataFrame(vh_vv_third,columns =['vh_i','vv_i'])
vh_vv_data_3 = pd.DataFrame(vh_vv_sec_2,columns =['vh_2','vv_2'])
vh_vv_data_4 = pd.DataFrame(vh_vv_third_2,columns =['vh2_i','vv2_i'])
vh_vv_data_5 = pd.DataFrame(vh_vv_first,columns =['vh_s','vv_s'])
vh_vv_data_6 = pd.DataFrame(vh_vv_first_2,columns =['vh2_s','vv2_s'])

In [None]:
#Combining all the dataframes
crop_data = combine_two_datasets(crop_presence_data,vh_vv_data_5)
crop_data = combine_two_datasets(crop_data,vh_vv_data)
crop_data = combine_two_datasets(crop_data,vh_vv_data_2)
crop_data = combine_two_datasets(crop_data,vh_vv_data_6)
crop_data = combine_two_datasets(crop_data,vh_vv_data_3)
crop_data = combine_two_datasets(crop_data,vh_vv_data_4)

In [7]:
'''
Until the above comment we will not execute the code again because we already have done this and stored it in a csv file.
'''

'\nUntil the above comment we will not execute the code again because we already have done this and stored it in a csv file.\n'

In [8]:
'''
Below we load the csv file for our merged datasset.
'''

'\nBelow we load the csv file for our merged datasset.\n'

In [11]:
crop_data = pd.read_csv(r'C:\Users\nayan\Desktop\crop_data_vv_vh_periodized_collated_new.csv')
crop_data.head()

Unnamed: 0.1,Unnamed: 0,Latitude and Longitude,Class of Land,vh_s,vv_s,vh,vv,vh_i,vv_i,vh2_s,vv2_s,vh_2,vv_2,vh2_i,vv2_i
0,0,"(10.323727047081501, 105.2516346045924)",Rice,0.052673,0.077158,0.021167,0.12161,0.031363,0.151444,0.025569,0.174023,0.028246,0.117162,0.020695,0.134561
1,1,"(10.322364360592521, 105.27843410554115)",Rice,0.038756,0.050612,0.032217,0.166822,0.02986,0.203029,0.017261,0.180266,0.029887,0.123415,0.018732,0.13514
2,2,"(10.321455902933202, 105.25254306225168)",Rice,0.042426,0.080974,0.028568,0.163661,0.020866,0.102974,0.015354,0.123397,0.028247,0.099574,0.018033,0.111584
3,3,"(10.324181275911162, 105.25118037576274)",Rice,0.030646,0.088907,0.023148,0.10731,0.027045,0.164621,0.025066,0.225846,0.027644,0.13748,0.017812,0.135902
4,4,"(10.324635504740822, 105.27389181724476)",Rice,0.033537,0.06321,0.023297,0.181832,0.034959,0.175523,0.023205,0.211035,0.028823,0.146306,0.025503,0.124346


In [12]:
import math

# Convert columns to float type
crop_data[['vh_s', 'vv_s', 'vh', 'vv', 'vh_i', 'vv_i', 'vh2_s', 'vv2_s', 'vh_2', 'vv_2', 'vh2_i', 'vv2_i']] = crop_data[['vh_s', 'vv_s', 'vh', 'vv', 'vh_i', 'vv_i', 'vh2_s', 'vv2_s', 'vh_2', 'vv_2', 'vh2_i', 'vv2_i']].astype(float)


In [13]:
#Creating lists to store rvi data
rvi_data = []
rvi_data_2 = []
rvi_data_3 = []
rvi_data_4 = []
rvi_data_5 = []
rvi_data_6 = []

In [14]:
'''
Below we generate rvi data for all the 6 sections of times we had extracted.
'''

'\nBelow we generate rvi data for all the 6 sections of times we had extracted.\n'

In [15]:
for row in crop_data.itertuples():
    vh_s = row[4]
    vv_s = row[5]
    vh_1 = row[6]
    vv_1 = row[7]
    vh_1i = row[8]
    vv_1i = row[9]
    vh_2s = row[10]
    vv_2s = row[11]
    vh_2 = row[12]
    vv_2 = row[13]
    vh_2i = row[14]
    vv_2i = row[15]
    
    rvi = math.sqrt(1- vv_s / (vv_s+vh_s)) * 4 * (vh_s / (vv_s + vh_s))
    rvi_2 = math.sqrt(1- vv_1 / (vv_1+vh_1)) * 4 * (vh_1 / (vv_1 + vh_1))
    rvi_3 = math.sqrt(1- vv_1i / (vv_1i+vh_1i)) * 4 * (vh_1i / (vv_1i + vh_1i))
    rvi_4 = math.sqrt(1- vv_2s / (vv_2s+vh_2s)) * 4 * (vh_2s / (vv_2s + vh_2s))
    rvi_5 = math.sqrt(1- vv_2 / (vv_2+vh_2)) * 4 * (vh_2 / (vv_2 + vh_2))
    rvi_6 = math.sqrt(1- vv_2i / (vv_2i+vh_2i)) * 4 * (vh_2i / (vv_2i + vh_2i))
    
    rvi_data.append(rvi)
    rvi_data_2.append(rvi_2)
    rvi_data_3.append(rvi_3)
    rvi_data_4.append(rvi_4)
    rvi_data_5.append(rvi_5)
    rvi_data_6.append(rvi_6)

In [16]:
crop_data.drop(columns=['Latitude and Longitude', 'Unnamed: 0'], inplace=True)

In [17]:
crop_data.head()

Unnamed: 0,Class of Land,vh_s,vv_s,vh,vv,vh_i,vv_i,vh2_s,vv2_s,vh_2,vv_2,vh2_i,vv2_i
0,Rice,0.052673,0.077158,0.021167,0.12161,0.031363,0.151444,0.025569,0.174023,0.028246,0.117162,0.020695,0.134561
1,Rice,0.038756,0.050612,0.032217,0.166822,0.02986,0.203029,0.017261,0.180266,0.029887,0.123415,0.018732,0.13514
2,Rice,0.042426,0.080974,0.028568,0.163661,0.020866,0.102974,0.015354,0.123397,0.028247,0.099574,0.018033,0.111584
3,Rice,0.030646,0.088907,0.023148,0.10731,0.027045,0.164621,0.025066,0.225846,0.027644,0.13748,0.017812,0.135902
4,Rice,0.033537,0.06321,0.023297,0.181832,0.034959,0.175523,0.023205,0.211035,0.028823,0.146306,0.025503,0.124346


In [18]:
'''
We combine the rvi values into the crop_data that we had.
'''

'\nWe combine the rvi values into the crop_data that we had.\n'

In [28]:
rvi_combined = pd.DataFrame(rvi_data,columns =['rvi'])
rvi_dataframe_2 = pd.DataFrame(rvi_data_2,columns =['rvi_2'])
rvi_dataframe_3 = pd.DataFrame(rvi_data_3,columns =['rvi_3'])
rvi_dataframe_4 = pd.DataFrame(rvi_data_4,columns =['rvi_4'])
rvi_dataframe_5 = pd.DataFrame(rvi_data_5,columns =['rvi_5'])
rvi_dataframe_6 = pd.DataFrame(rvi_data_6,columns =['rvi_6'])
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_2)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_3)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_4)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_5)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_6)

In [29]:
crop_data_rvi = combine_two_datasets(crop_data,rvi_combined)

In [30]:
crop_data_rvi

Unnamed: 0,Class of Land,vh_s,vv_s,vh,vv,vh_i,vv_i,vh2_s,vv2_s,vh_2,vv_2,vh2_i,vv2_i,rvi,rvi_2,rvi_3,rvi_4,rvi_5,rvi_6
0,Rice,0.052673,0.077158,0.021167,0.121610,0.031363,0.151444,0.025569,0.174023,0.028246,0.117162,0.020695,0.134561,1.033651,0.228331,0.284245,0.183406,0.342470,0.194665
1,Rice,0.038756,0.050612,0.032217,0.166822,0.029860,0.203029,0.017261,0.180266,0.029887,0.123415,0.018732,0.135140,1.142334,0.260488,0.183639,0.103330,0.344321,0.169896
2,Rice,0.042426,0.080974,0.028568,0.163661,0.020866,0.102974,0.015354,0.123397,0.028247,0.099574,0.018033,0.111584,0.806372,0.229163,0.276645,0.147250,0.415545,0.207571
3,Rice,0.030646,0.088907,0.023148,0.107310,0.027045,0.164621,0.025066,0.225846,0.027644,0.137480,0.017812,0.135902,0.519139,0.298967,0.212022,0.126301,0.273997,0.157784
4,Rice,0.033537,0.063210,0.023297,0.181832,0.034959,0.175523,0.023205,0.211035,0.028823,0.146306,0.025503,0.124346,0.816383,0.153097,0.270757,0.124718,0.267074,0.280839
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
595,Non Rice,0.085700,0.329533,0.058252,0.280634,0.067516,0.270628,0.067140,0.276306,0.067665,0.298165,0.066186,0.245990,0.375053,0.285066,0.356876,0.345735,0.318189,0.390492
596,Non Rice,0.132381,0.454205,0.069278,0.260582,0.073605,0.234795,0.073309,0.244816,0.068977,0.280298,0.075501,0.268088,0.428846,0.384998,0.466386,0.442487,0.351045,0.412033
597,Non Rice,0.046308,0.349162,0.057577,0.251251,0.061614,0.249543,0.063255,0.262141,0.070496,0.293176,0.069843,0.316960,0.160275,0.322001,0.352462,0.342834,0.341386,0.306909
598,Non Rice,0.069254,0.355037,0.066002,0.280890,0.074445,0.301951,0.073775,0.296859,0.068442,0.285206,0.081603,0.313238,0.263775,0.331976,0.351840,0.355227,0.340556,0.375823


In [31]:
#We initialize Standard Scaler
sc = StandardScaler()
training_data = crop_data_rvi.drop(columns = ['Class of Land'])

In [32]:
#We will now split the data for traiing and testing purpose

In [33]:
X = training_data
y = crop_data_rvi ['Class of Land'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3,stratify=y,random_state=40)

In [34]:
#We apply standard scaler on the X_train and X_test
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [None]:
#Below we generate our Voting Classifier Model

In [16]:
import numpy as np
import pandas as pd
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, VotingClassifier
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report


# Define base classifiers
dt_classifier = DecisionTreeClassifier()
rf_classifier = RandomForestClassifier()
svm_classifier = SVC(probability=True)  # probability=True is needed for soft voting

# Create a VotingClassifier
voting_classifier = VotingClassifier(
    estimators=[('dt', dt_classifier), ('rf', rf_classifier), ('svm', svm_classifier)],
    voting='soft'  # 'soft' for soft voting (probability-weighted), 'hard' for majority voting
)

# Define hyperparameters to search over
param_grid = {
    'dt__max_depth': [None, 5, 10],
    'rf__n_estimators': [50, 100, 200],
    'svm__C': [0.1, 1, 10]
}

# Use GridSearchCV to find the best combination of hyperparameters
grid_search = GridSearchCV(voting_classifier, param_grid, scoring='accuracy', cv=5)
grid_search.fit(X_train, y_train)

print('Best model', grid_search.best_estimator_)
# Get the best model from the grid search
best_voting_model = grid_search.best_estimator_




Best model VotingClassifier(estimators=[('dt', DecisionTreeClassifier()),
                             ('rf', RandomForestClassifier(n_estimators=50)),
                             ('svm', SVC(C=10, probability=True))],
                 voting='soft')


In [34]:
'''
The parameters for best voting classifier are as below:
Best model VotingClassifier(estimators=[('dt', DecisionTreeClassifier()),
                             ('rf', RandomForestClassifier(n_estimators=50)),
                             ('svm', SVC(C=10, probability=True))],
                 voting='soft')
'''

"\nThe parameters for best voting classifier are as below:\nBest model VotingClassifier(estimators=[('dt', DecisionTreeClassifier()),\n                             ('rf', RandomForestClassifier(n_estimators=50)),\n                             ('svm', SVC(C=10, probability=True))],\n                 voting='soft')\n"

In [1]:
'''
We might even notice small change in accuracy of model because the above code makes model chose random paratmeters so their is always
slight difference in accuracy. We obtained the best accuracy of 93% till now.
'''

'\nWe might even notice small change in accuracy of model because the above code makes model chose random paratmeters so their is always\nslight difference in accuracy. We obtained the best accuracy of 93% till now.\n'

In [35]:
#We create the voting model with best parameters
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier

# Define individual classifiers
dt_classifier = DecisionTreeClassifier()
rf_classifier = RandomForestClassifier(n_estimators=50)
svm_classifier = SVC(C=10, probability=True)

# Create VotingClassifier with best parameters
voting_classifier = VotingClassifier(
    estimators=[
        ('dt', dt_classifier),
        ('rf', rf_classifier),
        ('svm', svm_classifier)
    ],
    voting='soft'
)



In [37]:
voting_classifier.fit(X_train, y_train)

In [38]:
insample_predictions = voting_classifier.predict(X_train)

In [39]:
print("Insample Accuracy {0:.2f}%".format(100*accuracy_score(insample_predictions,y_train)))
print(classification_report(insample_predictions,y_train))

Insample Accuracy 100.00%
              precision    recall  f1-score   support

    Non Rice       1.00      1.00      1.00       210
        Rice       1.00      1.00      1.00       210

    accuracy                           1.00       420
   macro avg       1.00      1.00      1.00       420
weighted avg       1.00      1.00      1.00       420



In [40]:
outsample_predictions = voting_classifier.predict(X_test)

In [41]:
print("Outsample Accuracy {0:.2f}%".format(100*accuracy_score(outsample_predictions,y_test)))
print(classification_report(outsample_predictions,y_test))

Outsample Accuracy 98.89%
              precision    recall  f1-score   support

    Non Rice       0.99      0.99      0.99        90
        Rice       0.99      0.99      0.99        90

    accuracy                           0.99       180
   macro avg       0.99      0.99      0.99       180
weighted avg       0.99      0.99      0.99       180



In [43]:
#We load the testing file as before we will not execute this as we have already stored a csv file after extracting all the values

In [None]:
test_file = pd.read_csv(r'Downloads\challenge_1_submission_template.csv')
test_file.head()

In [None]:
test_vh_vv_first = []
test_vh_vv_sec = []
test_vh_vv_third = []
test_vh_vv_first_2 = []
test_vh_vv_sec_2 = []
test_vh_vv_third_2 = []

In [None]:
time_slice_first = "2021-11-01/2021-12-31"
time_slice_sec = "2022-01-01/2022-02-28"
time_slice_third = "2022-03-01/2022-04-30"

time_slice_first_2 = "2022-04-01/2022-05-31"
time_slice_sec_2 = "2022-05-01/2022-06-30"
time_slice_third_2 = "2022-07-01/2022-08-31"

In [None]:
assests = ['vh', 'vv']
for coordinates in tqdm(test_file['Latitude and Longitude']):
    test_vh_vv_first.append(get_sentinel_data(coordinates,time_slice_first, assests))
        
test_vh_vv_first_data = pd.DataFrame(test_vh_vv_first,columns =['vh_s','vv_s'])
test_vh_vv_first_data.to_csv('crop_data_test_1.csv')

In [None]:
assests = ['vh', 'vv']
for coordinates in tqdm(test_file['Latitude and Longitude']):
    test_vh_vv_sec.append(get_sentinel_data(coordinates,time_slice_sec, assests))
      
test_vh_vv_sec_data = pd.DataFrame(test_vh_vv_sec,columns =['vh','vv'])
test_vh_vv_sec_data.to_csv('crop_data_test_2.csv')

In [None]:
assests = ['vh', 'vv']
for coordinates in tqdm(test_file['Latitude and Longitude']):
    test_vh_vv_third.append(get_sentinel_data(coordinates,time_slice_third, assests))
    
test_vh_vv_third_data = pd.DataFrame(test_vh_vv_third,columns =['vh_i','vv_i'])
test_vh_vv_third_data.to_csv('crop_data_test_3.csv')

In [None]:
assests = ['vh', 'vv']
for coordinates in tqdm(test_file['Latitude and Longitude']):
    test_vh_vv_first_2.append(get_sentinel_data(coordinates,time_slice_first_2, assests))
        
test_vh_vv_first_2_data = pd.DataFrame(test_vh_vv_first_2,columns =['vh2_s','vv2_s'])
test_vh_vv_first_2_data.to_csv('crop_data_test_4.csv')

In [None]:
assests = ['vh', 'vv']
for coordinates in tqdm(test_file['Latitude and Longitude']):
    test_vh_vv_sec_2.append(get_sentinel_data(coordinates,time_slice_sec_2, assests))
     
test_vh_vv_sec_2_data = pd.DataFrame(test_vh_vv_sec_2,columns =['vh2','vv2'])
test_vh_vv_sec_2_data.to_csv('crop_data_test_5.csv')

In [None]:
assests = ['vh', 'vv']
for coordinates in tqdm(test_file['Latitude and Longitude']):
    test_vh_vv_third_2.append(get_sentinel_data(coordinates,time_slice_third_2, assests))
        
test_vh_vv_third_2_data = pd.DataFrame(test_vh_vv_third_2,columns =['vh2_i','vv2_i'])
test_vh_vv_third_2_data.to_csv('crop_data_test_6.csv')

In [None]:
test_vh_vv_data = pd.DataFrame(test_vh_vv_sec,columns =['vh','vv'])
test_vh_vv_data_2 = pd.DataFrame(test_vh_vv_third,columns =['vh_i','vv_i'])
test_vh_vv_data_3 = pd.DataFrame(test_vh_vv_sec_2,columns =['vh_2','vv_2'])
test_vh_vv_data_4 = pd.DataFrame(test_vh_vv_third_2,columns =['vh2_i','vv2_i'])
test_vh_vv_data_5 = pd.DataFrame(test_vh_vv_first,columns =['vh_s','vv_s'])
test_vh_vv_data_6 = pd.DataFrame(test_vh_vv_first_2,columns =['vh2_s','vv2_s'])

In [None]:
test_crop_data = combine_two_datasets(test_file,test_vh_vv_data_5)
test_crop_data = combine_two_datasets(test_crop_data,test_vh_vv_data)
test_crop_data = combine_two_datasets(test_crop_data,test_vh_vv_data_2)
test_crop_data = combine_two_datasets(test_crop_data,test_vh_vv_data_6)
test_crop_data = combine_two_datasets(test_crop_data,test_vh_vv_data_3)
test_crop_data = combine_two_datasets(test_crop_data,test_vh_vv_data_4)

test_crop_data.to_csv(r'C:\Users\nayan\Desktop\crop_data_vv_vh_periodized_test_new.csv')

In [44]:
#We load the data that we had already stored as csv file

In [45]:
test_crop_data = pd.read_csv(r'C:\Users\nayan\Desktop\crop_data_vv_vh_periodized_test_new.csv')

In [46]:
test_crop_data.head()

Unnamed: 0.1,Unnamed: 0,Latitude and Longitude,Class of Land,vh_s,vv_s,vh,vv,vh_i,vv_i,vh2_s,vv2_s,vh_2,vv_2,vh2_i,vv2_i
0,0,"(10.18019073690894, 105.32022315786804)",,0.005727,0.030416,0.019493,0.1118,0.026121,0.130829,0.022544,0.154875,0.030419,0.115565,0.025715,0.144311
1,1,"(10.561107033461816, 105.12772097986661)",,0.005012,0.020915,0.026987,0.095377,0.028386,0.105415,0.027241,0.1504,0.032485,0.145144,0.028722,0.124136
2,2,"(10.623790611954897, 105.13771401411867)",,0.00671,0.07656,0.017792,0.122356,0.037807,0.148186,0.023368,0.171789,0.023858,0.16654,0.035935,0.141052
3,3,"(10.583364246115156, 105.23946127195805)",,0.003873,0.011879,0.004403,0.015108,0.004632,0.016826,0.006874,0.029732,0.006855,0.02915,0.004395,0.016426
4,4,"(10.20744446668854, 105.26844107128906)",,0.008515,0.088285,0.021408,0.066899,0.021497,0.166486,0.018607,0.137132,0.023161,0.070661,0.02574,0.163289


In [47]:
test_crop_data = test_crop_data[['vh_s','vv_s','vh','vv','vh_i','vv_i','vh2_s','vv2_s','vh_2','vv_2','vh2_i','vv2_i']]

rvi_data = []
rvi_data_2 = []
rvi_data_3 = []
rvi_data_4 = []
rvi_data_5 = []
rvi_data_6 = []

for row in test_crop_data.itertuples():
    vh_s = row[1]
    vv_s = row[2]
    vh_1 = row[3]
    vv_1 = row[4]
    vh_1i = row[5]
    vv_1i = row[6]
    vh_2s = row[7]
    vv_2s = row[8]
    vh_2 = row[9]
    vv_2 = row[10]
    vh_2i = row[11]
    vv_2i = row[12]
    
    rvi = math.sqrt(1- vv_s / (vv_s+vh_s)) * 4 * (vh_s / (vv_s + vh_s))
    rvi_2 = math.sqrt(1- vv_1 / (vv_1+vh_1)) * 4 * (vh_1 / (vv_1 + vh_1))
    rvi_3 = math.sqrt(1- vv_1i / (vv_1i+vh_1i)) * 4 * (vh_1i / (vv_1i + vh_1i))
    rvi_4 = math.sqrt(1- vv_2s / (vv_2s+vh_2s)) * 4 * (vh_2s / (vv_2s + vh_2s))
    rvi_5 = math.sqrt(1- vv_2 / (vv_2+vh_2)) * 4 * (vh_2 / (vv_2 + vh_2))
    rvi_6 = math.sqrt(1- vv_2i / (vv_2i+vh_2i)) * 4 * (vh_2i / (vv_2i + vh_2i))
    
    rvi_data.append(rvi)
    rvi_data_2.append(rvi_2)
    rvi_data_3.append(rvi_3)
    rvi_data_4.append(rvi_4)
    rvi_data_5.append(rvi_5)
    rvi_data_6.append(rvi_6)

rvi_combined = pd.DataFrame(rvi_data,columns =['rvi'])
rvi_dataframe_2 = pd.DataFrame(rvi_data_2,columns =['rvi_2'])
rvi_dataframe_3 = pd.DataFrame(rvi_data_3,columns =['rvi_3'])
rvi_dataframe_4 = pd.DataFrame(rvi_data_4,columns =['rvi_4'])
rvi_dataframe_5 = pd.DataFrame(rvi_data_5,columns =['rvi_5'])
rvi_dataframe_6 = pd.DataFrame(rvi_data_6,columns =['rvi_6'])
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_2)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_3)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_4)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_5)
rvi_combined = combine_two_datasets(rvi_combined,rvi_dataframe_6)

test_crop_data_rvi = combine_two_datasets(test_crop_data,rvi_combined)

In [48]:
test_data = sc.transform(test_crop_data_rvi)

In [49]:
test_predictions = voting_classifier.predict(test_data)

test_predictions

test_predictions = pd.DataFrame(test_predictions,columns =['target'])

test_predictions['target'].value_counts()['Rice']

116

In [50]:
submission_template = pd.read_csv(r'C:\Users\nayan\Downloads\challenge_1_submission_template.csv')

In [51]:
submission_template.head()

Unnamed: 0,Latitude and Longitude,Class of Land
0,"(10.18019073690894, 105.32022315786804)",
1,"(10.561107033461816, 105.12772097986661)",
2,"(10.623790611954897, 105.13771401411867)",
3,"(10.583364246115156, 105.23946127195805)",
4,"(10.20744446668854, 105.26844107128906)",


In [52]:
submission_template = submission_template.rename(columns={"Latitude and Longitude": "id"})

In [53]:

submission_template = combine_two_datasets(submission_template,test_predictions)


In [54]:
submission_template = submission_template.drop('Class of Land', axis=1)

In [55]:
submission_template.head()

Unnamed: 0,id,target
0,"(10.18019073690894, 105.32022315786804)",Rice
1,"(10.561107033461816, 105.12772097986661)",Rice
2,"(10.623790611954897, 105.13771401411867)",Rice
3,"(10.583364246115156, 105.23946127195805)",Non Rice
4,"(10.20744446668854, 105.26844107128906)",Rice


In [33]:
#Dumping the predictions into a csv file.
submission_template.to_csv(r"C:\Users\nayan\Downloads\VotingCLassifier_Final_attempt_Submission.csv",index = False)