In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
from glob import glob
import joblib
import json
import time
from copy import deepcopy
import os
import re
import numpy as np
import ee
import ast

In [None]:
agroclimaticZone_acronym_dict = {'Eastern Plateau & Hills Region': 'EPAHR',
                               'Southern Plateau and Hills Region': 'SPAHR',
                               'East Coast Plains & Hills Region': 'ECPHR',
                               'Western Plateau and Hills Region': 'WPAHR',
                               'Central Plateau & Hills Region': 'CPAHR',
                               'Lower Gangetic Plain Region': 'LGPR',
                                'Middle Gangetic Plain Region': 'MGPR',
                                'Eastern Himalayan Region': 'EHR',
                                'Western Himalayan Region': 'WHR',
                                'Upper Gangetic Plain Region': 'UGPR',
                                'Trans Gangetic Plain Region': 'TGPR',
                                'West Coast Plains & Ghat Region': 'WCPGR',
                                'Gujarat Plains & Hills Region': 'GPHR',
                                'Western Dry Region': 'WDR'}

In [None]:
best_month_dict = {'Eastern Plateau & Hills Region': 'cc_12',
                   'Middle Gangetic Plain Region': 'cc_10',
                   'Lower Gangetic Plain Region': 'cc_9',
                   'Western Himalayan Region': 'cc_8',
                   'Eastern Himalayan Region': 'cc_10',
                   'Upper Gangetic Plain Region': 'cc_9',
                   'Trans Gangetic Plain Region': 'cc_9',
                   'Central Plateau & Hills Region': 'cc_7',
                   'Western Plateau and Hills Region': 'cc_11',
                   'Southern Plateau and Hills Region': 'cc_8',
                   'East Coast Plains & Hills Region': 'cc_12'}

Rename the folders correctly before predicting. Sometimes some bug from GEE and Google Drive causes multiple copies of the same folder to be created and splits files between the 2 folders

## Rename

In [None]:
agroclimatic_zone = 'Southern Plateau and Hills Region'
# agroclimatic_zone = 'Eastern Plateau & Hills Region'
# agroclimatic_zone = "East Coast Plains & Hills Region"
# agroclimatic_zone = 'Western Plateau and Hills Region'
# agroclimatic_zone = "Central Plateau & Hills Region"
# agroclimatic_zone = 'Lower Gangetic Plain Region'
# agroclimatic_zone = 'Middle Gangetic Plain Region'
# agroclimatic_zone = 'Eastern Himalayan Region'
# agroclimatic_zone = 'Western Himalayan Region'
# agroclimatic_zone = 'Trans Gangetic Plain Region'
# agroclimatic_zone = 'Gujarat Plains & Hills Region'
# agroclimatic_zone = 'Western Dry Region'

In [None]:
# Set the list of years for which to rename folders
years = ['2023']

In [None]:
df = pd.read_csv(f'drive/MyDrive/harsh/{agroclimatic_zone}.csv')
dist_list = list(df['Name'])

In [None]:
# Set the root directory to the specified folder
root_dir = '/content/drive/MyDrive/'
os.chdir(root_dir)

In [None]:
! pwd

In [None]:
# Get the current directory
current_directory = os.getcwd()

# List all folders in the current directory
folders = [f for f in os.listdir(current_directory) if os.path.isdir(os.path.join(current_directory, f))]

# Print the list of folders
# print("Folders in the current directory:")

# for folder in folders:
#     print(folder)

In [None]:
# Regular expression pattern to match folder names starting with a particular pattern
pattern = re.compile(r'^' + f'{agroclimaticZone_acronym_dict[agroclimatic_zone]}')

# Filter folders based on the pattern
matching_folders = [folder for folder in folders if pattern.match(folder)]

# Print the matching folders
print(f"Folders starting with '{agroclimaticZone_acronym_dict[agroclimatic_zone]}':")
for folder in matching_folders:
    print(folder)

In [None]:
# Leh (Ladkh) is the only district encountered having special character '(' and ')' in it. That's why its handled in a special way as seen here

# Transfer files from duplicate folders to original folder

dist_num = 0
for district in dist_list:
    print(dist_num)
    for year in years:
        print(district, year)

        orig_district = district
        if district == 'Leh (Ladakh)':
            district = 'Leh \(Ladakh\)'

        pattern = re.compile(r'^' + agroclimaticZone_acronym_dict[agroclimatic_zone] + '_' + district + '_' + year)
        district_year_folders = [folder for folder in matching_folders if pattern.match(folder)]

        district = orig_district

        while len(district_year_folders) > 1:
            source_folder = district_year_folders[0]
            destination_folder = district_year_folders[1]

            files_to_move = os.listdir(source_folder)
            for file_name in files_to_move:
                source_path = os.path.join(source_folder, file_name)
                destination_path = os.path.join(destination_folder, file_name)
                os.rename(source_path, destination_path)

            del district_year_folders[0]

        current_folder_name = district_year_folders[0]
        new_folder_name = f'{agroclimaticZone_acronym_dict[agroclimatic_zone]}_{district}_{year}'
        os.rename(current_folder_name, new_folder_name)

    dist_num += 1

In [None]:
# Leh (Ladkh) is the only district encountered having special character '(' and ')' in it. That's why its handled in a special way as seen here

# Check all folders with more than 0 files
dist_num = 0
for district in dist_list:
    print(dist_num)
    for year in years:

        orig_district = district
        if district == 'Leh (Ladakh)':
            district = 'Leh \(Ladakh\)'

        pattern = re.compile(r'^' + agroclimaticZone_acronym_dict[agroclimatic_zone] + '_' + district + '_' + year)
        district_year_folders = [folder for folder in matching_folders if pattern.match(folder)]

        district = orig_district

        for folder in district_year_folders:
            if len(os.listdir(folder)) > 0:
                print(folder)

    dist_num += 1

In [None]:
# Change back to parent directory
os.chdir(os.path.dirname(os.getcwd()))

In [None]:
! pwd

## CHM

In [None]:
# agroclimatic_zone = 'Eastern Plateau & Hills Region'
agroclimatic_zone = 'Southern Plateau and Hills Region'
# agroclimatic_zone = 'East Coast Plains & Hills Region'
# agroclimatic_zone = 'Western Plateau and Hills Region'
# agroclimatic_zone = 'Central Plateau & Hills Region'
# agroclimatic_zone = 'Lower Gangetic Plain Region'
# agroclimatic_zone = 'Middle Gangetic Plain Region'
# agroclimatic_zone = 'Trans Gangetic Plain Region'
# agroclimatic_zone = 'Upper Gangetic Plain Region'
# agroclimatic_zone = 'Western Himalayan Region'
# agroclimatic_zone = 'Eastern Himalayan Region'

In [None]:
# Set the list of years for which to compute result
years = ['2021', '2022', '2023']

In [None]:
df = pd.read_csv('drive/MyDrive/harsh/district_to_agroclimaticZone_mapping.csv')

In [None]:
# Function to convert string representation of list to an actual list
def convert_to_list(string):
    return ast.literal_eval(string)

df['IntersectingZones'] = df['IntersectingZones'].apply(convert_to_list)

In [None]:
district_mapping_df = df[df['AgroclimaticZone'] == agroclimatic_zone][['District', 'IntersectingZones']]

In [None]:
# print(district_mapping_df['IntersectingZones'][165])
# district_mapping_df['IntersectingZones']
i = 0
for ind in district_mapping_df.index:
    district = district_mapping_df.loc[ind, 'District']
    zones = district_mapping_df['IntersectingZones'][ind]
    print(i, district, zones)
    i += 1
    # for zone in zones:
    #     if zone not in agroclimaticZone_acronym_dict:
    #         print(district, zone)

In [None]:
agroclimatic_zone_model_path_mapping_rh98 = {'Eastern Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Eastern_Plateau_Hills_Region_correct_toa_rh98_24.joblib',
                                             'East Coast Plains & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/East_Coast_Plains_Hills_Region_correct_toa_rh98_23.joblib',
                                             'Western Himalayan Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Western_Himalayan_Region_correct_toa_rh98_30.joblib',
                                             'Eastern Himalayan Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Eastern_Himalayan_Region_correct_toa_rh98_25.joblib',
                                             'Central Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Central_Plateau_Hills_Region_correct_toa_rh98_23.joblib',
                                             'Southern Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Southern_Plateau_and_Hills_Region_correct_toa_rh98_23.joblib',
                                             'Western Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Western_Plateau_and_Hills_Region_correct_toa_rh98_24.joblib',
                                             'Upper Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Upper_Gangetic_Plain_Region_correct_toa_rh98_29.joblib',
                                             'Middle Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Middle_Gangetic_Plain_Region_correct_toa_rh98_24.joblib',
                                             'Trans Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Trans_Gangetic_Plain_Region_correct_toa_rh98_21.joblib',
                                             'Lower Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Lower_Gangetic_Plain_Region_correct_toa_rh98_17.joblib'}

agroclimatic_zone_model_path_mapping_rh75 = {'Eastern Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Eastern_Plateau_Hills_Region_correct_toa_rh75_17.joblib',
                                             'East Coast Plains & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/East_Coast_Plains_Hills_Region_correct_toa_rh75_16.joblib',
                                             'Western Himalayan Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Western_Himalayan_Region_correct_toa_rh75_20.joblib',
                                             'Eastern Himalayan Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Eastern_Himalayan_Region_correct_toa_rh75_18.joblib',
                                             'Central Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Central_Plateau_Hills_Region_correct_toa_rh75_16.joblib',
                                             'Southern Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Southern_Plateau_and_Hills_Region_correct_toa_rh75_16.joblib',
                                             'Western Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Western_Plateau_and_Hills_Region_correct_toa_rh75_17.joblib',
                                             'Upper Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Upper_Gangetic_Plain_Region_correct_toa_rh75_22.joblib',
                                             'Middle Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Middle_Gangetic_Plain_Region_correct_toa_rh75_17.joblib',
                                             'Trans Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Trans_Gangetic_Plain_Region_correct_toa_rh75_15.joblib',
                                             'Lower Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Lower_Gangetic_Plain_Region_correct_toa_rh75_12.joblib'}

agroclimatic_zone_model_path_mapping_rh50 = {'Eastern Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Eastern_Plateau_Hills_Region_correct_toa_rh50_12.joblib',
                                             'East Coast Plains & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/East_Coast_Plains_Hills_Region_correct_toa_rh50_12.joblib',
                                             'Western Himalayan Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Western_Himalayan_Region_correct_toa_rh50_14.joblib',
                                             'Eastern Himalayan Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Eastern_Himalayan_Region_correct_toa_rh50_13.joblib',
                                             'Central Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Central_Plateau_Hills_Region_correct_toa_rh50_11.joblib',
                                             'Southern Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Southern_Plateau_and_Hills_Region_correct_toa_rh50_12.joblib',
                                             'Western Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Western_Plateau_and_Hills_Region_correct_toa_rh50_12.joblib',
                                             'Upper Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Upper_Gangetic_Plain_Region_correct_toa_rh50_17.joblib',
                                             'Middle Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Middle_Gangetic_Plain_Region_correct_toa_rh50_12.joblib',
                                             'Trans Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Trans_Gangetic_Plain_Region_correct_toa_rh50_11.joblib',
                                             'Lower Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/chm_final/Lower_Gangetic_Plain_Region_correct_toa_rh50_9.joblib'}

In [None]:
MODEL_PATH_rh98 = agroclimatic_zone_model_path_mapping_rh98[agroclimatic_zone]
model_rh98 = joblib.load(MODEL_PATH_rh98)

MODEL_PATH_rh75 = agroclimatic_zone_model_path_mapping_rh75[agroclimatic_zone]
model_rh75 = joblib.load(MODEL_PATH_rh75)

MODEL_PATH_rh50 = agroclimatic_zone_model_path_mapping_rh50[agroclimatic_zone]
model_rh50 = joblib.load(MODEL_PATH_rh50)

In [None]:
if hasattr(model_rh98, 'feature_names_in_'):
    features_rh98 = model_rh98.feature_names_in_

if hasattr(model_rh75, 'feature_names_in_'):
    features_rh75 = model_rh75.feature_names_in_

if hasattr(model_rh50, 'feature_names_in_'):
    features_rh50 = model_rh50.feature_names_in_

In [None]:
seasons = ['kharif', 'rabi', 'zaid']

In [None]:
def add_s1_indices(df):
    for season in seasons:
        # SAR Indices
        df[f'VV_VH_Ratio_{season}'] = df[f'VV_{season}'] / df[f'VH_{season}']
        df[f'VH_VV_Ratio_{season}'] = df[f'VH_{season}'] / df[f'VV_{season}']
        df[f'SAR_NDVI_{season}'] = (df[f'VH_{season}'] - df[f'VV_{season}']) / (df[f'VH_{season}'] + df[f'VV_{season}'])
        df[f'SAR_DVI_{season}'] = df[f'VH_{season}'] - df[f'VV_{season}']
        df[f'SAR_SVI_{season}'] = df[f'VH_{season}'] + df[f'VV_{season}']
        df[f'SAR_RDVI_{season}'] = (df[f'VH_{season}'] / df[f'VV_{season}']) - (df[f'VV_{season}'] / df[f'VH_{season}'])
        df[f'SAR_NRDVI_{season}'] = ((df[f'VH_{season}']/df[f'VV_{season}'] - df[f'VV_{season}']/df[f'VH_{season}']) / (df[f'VH_{season}']/df[f'VV_{season}'] + df[f'VV_{season}']/df[f'VH_{season}']))
        df[f'SAR_SSDVI_{season}'] = df[f'VH_{season}']**2 - df[f'VV_{season}']**2

def add_s2_indices(df):
    for season in seasons:
        # Optical Indices
        df[f'NDVI_{season}'] = (df[f'B8_{season}'] - df[f'B4_{season}']) / (df[f'B8_{season}'] + df[f'B4_{season}'])
        df[f'NDWI_{season}'] = (df[f'B8_{season}'] - df[f'B12_{season}']) / (df[f'B8_{season}'] + df[f'B12_{season}'])
        df[f'EVI_{season}'] = (2.5 * ((df[f'B8_{season}'] - df[f'B4_{season}']) / (df[f'B8_{season}'] + 6*df[f'B4_{season}'] - 7.5*df[f'B2_{season}'] + 1)))
        df[f'OSAVI_{season}'] = (df[f'B8_{season}'] - df[f'B4_{season}']) / (df[f'B8_{season}'] + df[f'B4_{season}'] + 0.16)
        df[f'ARVI_{season}'] = (df[f'B8_{season}'] - 2*df[f'B4_{season}'] + df[f'B2_{season}']) / (df[f'B8_{season}'] + 2*df[f'B4_{season}'] + df[f'B2_{season}'])
        df[f'VARI_{season}'] = (df[f'B3_{season}'] - df[f'B4_{season}']) / (df[f'B3_{season}'] + df[f'B4_{season}'] - df[f'B2_{season}'])

In [None]:
def get_csv_data(fileName):
    data = pd.DataFrame()
    try:
        data = pd.read_csv(fileName)
    except Exception as exp:
        print("Error reading file ", fileName, " - ", exp)
    return data

In [None]:
# For Canopy Height
def pipeline(fileName):
    # print(fileName)

    df = get_csv_data(fileName)

    if (len(df) == 0):
        return df

    add_s1_indices(df)
    add_s2_indices(df)

    geoList = list(df['.geo'])
    res_df = pd.DataFrame()
    res_df['.geo'] = geoList

    test_df = df[features_rh98]
    pred_y_98 = list(model_rh98.predict(test_df))
    test_df = df[features_rh75]
    pred_y_75 = list(model_rh75.predict(test_df))
    test_df = df[features_rh50]
    pred_y_50 = list(model_rh50.predict(test_df))

    res_df['rh98_class'] = pred_y_98
    res_df['rh75_class'] = pred_y_75
    res_df['rh50_class'] = pred_y_50

    return res_df

In [None]:
for year in years:
    dist_num = 0
    for ind in district_mapping_df.index:
        # if dist_num < 14:
        #     dist_num += 1
        #     continue
        district = district_mapping_df.loc[ind, 'District']
        zones = district_mapping_df['IntersectingZones'][ind]
        merged_df = pd.DataFrame()
        for zone in zones:
            print(f'\n{dist_num} District: {district}, Zone: {zone}, Year: {year}')
            dist_data_path = f'drive/MyDrive/{agroclimaticZone_acronym_dict[zone]}_{district}_{year}/'
            files = glob(dist_data_path + "*.csv")
            print("no. of files:", len(files), '\n')
            for fileName in files:
                df = pipeline(fileName)
                merged_df = pd.concat([merged_df, df])

        merged_df.to_csv(f'drive/MyDrive/{agroclimatic_zone}/{district}/{year}/result_chm.csv', index=False)
        dist_num += 1

## CCD

In [None]:
# agroclimatic_zone = 'Eastern Plateau & Hills Region'
agroclimatic_zone = 'Southern Plateau and Hills Region'
# agroclimatic_zone = 'East Coast Plains & Hills Region'
# agroclimatic_zone = 'Western Plateau and Hills Region'
# agroclimatic_zone = 'Central Plateau & Hills Region'
# agroclimatic_zone = 'Lower Gangetic Plain Region'
# agroclimatic_zone = 'Middle Gangetic Plain Region'
# agroclimatic_zone = 'Eastern Himalayan Region'
# agroclimatic_zone = 'Western Himalayan Region'
# agroclimatic_zone = 'Upper Gangetic Plain Region'
# agroclimatic_zone = 'Trans Gangetic Plain Region'

In [None]:
df = pd.read_csv(f'drive/MyDrive/harsh/{agroclimatic_zone}.csv')
dist_list = list(df['Name'])

In [None]:
dist_list

In [None]:
agroclimatic_zone_model_path_mapping = {'Central Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/corrected/Central_Plateau_Hills_Region_toa_monthly_cover_51.joblib',
                                        'Lower Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/corrected/Lower_Gangetic_Plain_Region_toa_monthly_cover_48.joblib',
                                        'Middle Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/corrected/Middle_Gangetic_Plain_Region_toa_monthly_cover_50.joblib',
                                        'Eastern Himalayan Region': 'drive/MyDrive/dhruvi/best_models/corrected/Eastern_Himalayan_Region_toa_monthly_cover_86.joblib',
                                        'Western Himalayan Region': 'drive/MyDrive/dhruvi/best_models/corrected/Western_Himalayan_Region_toa_monthly_cover_78.joblib',
                                        'Upper Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/corrected/Upper_Gangetic_Plain_Region_toa_monthly_cover_67.joblib',
                                        'Trans Gangetic Plain Region': 'drive/MyDrive/dhruvi/best_models/corrected/Trans_Gangetic_Plain_Region_toa_monthly_cover_55.joblib',
                                        'East Coast Plains & Hills Region': 'drive/MyDrive/dhruvi/best_models/corrected/East_Coast_Plains_Hills_Region_toa_monthly_cover_67.joblib',
                                        'Eastern Plateau & Hills Region': 'drive/MyDrive/dhruvi/best_models/corrected/Eastern_Plateau_Hills_Region_toa_monthly_cover_60.joblib',
                                        'Western Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/corrected/Western_Plateau_and_Hills_Region_toa_monthly_cover_57.joblib',
                                        'Southern Plateau and Hills Region': 'drive/MyDrive/dhruvi/best_models/corrected/Southern_Plateau_and_Hills_Region_toa_monthly_cover_62.joblib'}

In [None]:
MODEL_PATH_cc = agroclimatic_zone_model_path_mapping[agroclimatic_zone]
model_cc = joblib.load(MODEL_PATH_cc)

In [None]:
if hasattr(model_cc, 'feature_names_in_'):
    features_cc = model_cc.feature_names_in_

In [None]:
seasons = ['kharif', 'rabi', 'zaid']

In [None]:
def add_s1_indices(df):
    for season in seasons:
        # SAR Indices
        df[f'VV_VH_Ratio_{season}'] = df[f'VV_{season}'] / df[f'VH_{season}']
        df[f'VH_VV_Ratio_{season}'] = df[f'VH_{season}'] / df[f'VV_{season}']
        df[f'SAR_NDVI_{season}'] = (df[f'VH_{season}'] - df[f'VV_{season}']) / (df[f'VH_{season}'] + df[f'VV_{season}'])
        df[f'SAR_DVI_{season}'] = df[f'VH_{season}'] - df[f'VV_{season}']
        df[f'SAR_SVI_{season}'] = df[f'VH_{season}'] + df[f'VV_{season}']
        df[f'SAR_RDVI_{season}'] = (df[f'VH_{season}'] / df[f'VV_{season}']) - (df[f'VV_{season}'] / df[f'VH_{season}'])
        df[f'SAR_NRDVI_{season}'] = ((df[f'VH_{season}']/df[f'VV_{season}'] - df[f'VV_{season}']/df[f'VH_{season}']) / (df[f'VH_{season}']/df[f'VV_{season}'] + df[f'VV_{season}']/df[f'VH_{season}']))
        df[f'SAR_SSDVI_{season}'] = df[f'VH_{season}']**2 - df[f'VV_{season}']**2

def add_s2_indices(df):
    for season in seasons:
        # Optical Indices
        df[f'NDVI_{season}'] = (df[f'B8_{season}'] - df[f'B4_{season}']) / (df[f'B8_{season}'] + df[f'B4_{season}'])
        df[f'NDWI_{season}'] = (df[f'B8_{season}'] - df[f'B12_{season}']) / (df[f'B8_{season}'] + df[f'B12_{season}'])
        df[f'EVI_{season}'] = (2.5 * ((df[f'B8_{season}'] - df[f'B4_{season}']) / (df[f'B8_{season}'] + 6*df[f'B4_{season}'] - 7.5*df[f'B2_{season}'] + 1)))
        df[f'OSAVI_{season}'] = (df[f'B8_{season}'] - df[f'B4_{season}']) / (df[f'B8_{season}'] + df[f'B4_{season}'] + 0.16)
        df[f'ARVI_{season}'] = (df[f'B8_{season}'] - 2*df[f'B4_{season}'] + df[f'B2_{season}']) / (df[f'B8_{season}'] + 2*df[f'B4_{season}'] + df[f'B2_{season}'])
        df[f'VARI_{season}'] = (df[f'B3_{season}'] - df[f'B4_{season}']) / (df[f'B3_{season}'] + df[f'B4_{season}'] - df[f'B2_{season}'])

In [None]:
def get_csv_data(fileName):
    data = pd.DataFrame()
    try:
        data = pd.read_csv(fileName)
    except Exception as exp:
        print("Error reading file ", fileName, " - ", exp)
    return data

In [None]:
# For Canopy Cover
def pipeline(fileName):

    print(fileName)

    df = get_csv_data(fileName)

    if (len(df) == 0):
        return df

    add_s1_indices(df)
    add_s2_indices(df)

    geoList = list(df['.geo'])
    res_df = pd.DataFrame()
    res_df['.geo'] = geoList

    for month in range(1,13):
        df['month_sin'] = [np.sin(2 * np.pi * month / 12)] * len(df)
        df['month_cos'] = [np.cos(2 * np.pi * month / 12)] * len(df)

        test_df = df[features_cc]
        pred_y_cc = list(model_cc.predict(test_df))
        res_df[f'cc_{month}'] = pred_y_cc

    return res_df

In [None]:
for year in years:
    dist_num = 0
    for district in dist_list:
        # if dist_num < 53:
        #     dist_num += 1
        #     continue
        print('\n', dist_num, district, year)
        dist_data_path = f'drive/MyDrive/{agroclimaticZone_acronym_dict[agroclimatic_zone]}_{district}_{year}/'
        files = glob(dist_data_path + "*.csv")
        print("no. of files:", len(files), '\n')
        merged_df = pd.DataFrame()
        for fileName in files:
            df = pipeline(fileName)

            merged_df = pd.concat([merged_df, df])

        merged_df.to_csv(f'drive/MyDrive/{agroclimatic_zone}/{district}/{year}/result_monthly_cc.csv', index=False)
        dist_num += 1