In [3]:
import ee
ee.Authenticate()



Successfully saved authorization token.


In [2]:
import ee
import pandas as pd

In [3]:
ee.Initialize()

## Rainfall

In [207]:
import ee
import pandas as pd

# Initialize the Earth Engine API
ee.Initialize()

# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point(80.53, 6.1667).buffer(20000),
    'kandy': ee.Geometry.Point(80.6371, 7.2906).buffer(20000),
    'kegalle': ee.Geometry.Point(80.3485, 7.2530).buffer(20000)
}

# Define the date range
start_date = '2011-01-01'
end_date = '2011-05-31'

# Function to extract rainfall data for each image and region
def extract_rainfall(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    rainfall = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=region,
        scale=5000
    ).get('precipitation')
    return ee.Feature(None, {'date': date, 'rainfall': rainfall})

# Function to get data for each district
def get_district_data(district, region):
    chirps = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') \
                .filterDate(start_date, end_date) \
                .filterBounds(region)
    
    rainfall_features = chirps.map(lambda image: extract_rainfall(image, region)).getInfo()
    
    rainfall_data = [{'date': feature['properties']['date'], 
                      'district': district,
                      'rainfall': feature['properties']['rainfall']} 
                     for feature in rainfall_features['features']]
    
    return rainfall_data

# Collect data for all districts
all_data = []
for district, region in districts.items():
    district_data = get_district_data(district, region)
    all_data.extend(district_data)

# Create a DataFrame
df = pd.DataFrame(all_data)

# Ensure the 'Date' column is in datetime format
df['date'] = pd.to_datetime(df['date'])

# Set 'Date' as the index
df.set_index('date', inplace=True)



In [208]:
df = df.groupby('district').resample('ME').sum()

df.drop(columns=['district'],inplace=True)
df.reset_index(inplace=True)
df = df[['date', 'district', 'rainfall']]
df_rainfall = df.sort_values(by=['date', 'district']).reset_index(drop=True)

In [209]:
df_rainfall

Unnamed: 0,date,district,rainfall
0,2011-01-31,kandy,349.679082
1,2011-01-31,kegalle,190.847129
2,2011-01-31,matara,336.414355
3,2011-02-28,kandy,334.853869
4,2011-02-28,kegalle,255.528496
5,2011-02-28,matara,304.910505
6,2011-03-31,kandy,125.319876
7,2011-03-31,kegalle,137.933144
8,2011-03-31,matara,243.574135
9,2011-04-30,kandy,465.714407


## Temperature

In [210]:
import ee
import pandas as pd

# Initialize the Earth Engine API
ee.Initialize()

# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the date range (extended to include the last day)
start_date = '2011-01-01'
end_date = '2011-06-01'  # Extend by one day to include May 31

# Function to extract temperature data for each image and region
def extract_temperature(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    min_temp = image.select('minimum_2m_air_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('minimum_2m_air_temperature')
    mean_temp = image.select('mean_2m_air_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('mean_2m_air_temperature')
    max_temp = image.select('maximum_2m_air_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('maximum_2m_air_temperature')
    return ee.Feature(None, {'date': date, 'min_temp': min_temp, 'mean_temp': mean_temp, 'max_temp': max_temp})

# Function to get temperature data for each district
def get_district_temperature(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    temperature_features = era5.map(lambda image: extract_temperature(image, region)).getInfo()
    
    temperature_data = [{'date': feature['properties']['date'], 
                         'district': district,
                         'min_temp': feature['properties']['min_temp'],
                         'mean_temp': feature['properties']['mean_temp'],
                         'max_temp': feature['properties']['max_temp']} 
                        for feature in temperature_features['features']]
    
    return temperature_data

# Collect temperature data for all districts
all_temperature_data = []
for district, region in districts.items():
    district_temperature_data = get_district_temperature(district, region)
    all_temperature_data.extend(district_temperature_data)

# Create a DataFrame
df_temperature = pd.DataFrame(all_temperature_data)

# Convert temperatures from Kelvin to Celsius
df_temperature['min_temp'] = df_temperature['min_temp'] - 273.15
df_temperature['mean_temp'] = df_temperature['mean_temp'] - 273.15
df_temperature['max_temp'] = df_temperature['max_temp'] - 273.15

# Ensure the 'date' column is in datetime format
df_temperature['date'] = pd.to_datetime(df_temperature['date'])

# Set 'date' as the index
# df_temperature.set_index('date', inplace=True)

# Save the DataFrame to a CSV file
# df_temperature.to_csv('district_temperature_2011.csv', index=True)

# Display the DataFrame
# print(df_temperature)


In [211]:
df_temperature.set_index('date', inplace=True)
df_temp = df_temperature.groupby('district').resample('ME').mean()
df_temp.reset_index(inplace=True)
df_temp = df_temp.sort_values(by=['date', 'district']).reset_index(drop=True)
df_temp = df_temp[['date', 'district', 'min_temp','max_temp','mean_temp']]
df_temp=df_temp.rename(columns={
    'min_temp':'min temp',
    'max_temp':'max temp',
    'mean_temp':'mean temp'
})


In [212]:
df_temp

Unnamed: 0,date,district,min temp,max temp,mean temp
0,2011-01-31,kandy,20.335188,25.971066,22.770838
1,2011-01-31,kegalle,21.882489,28.097871,24.610605
2,2011-01-31,matara,22.83583,27.804574,25.125435
3,2011-02-28,kandy,20.758498,27.625948,23.815408
4,2011-02-28,kegalle,22.412496,29.707987,25.66743
5,2011-02-28,matara,23.639027,28.285401,25.744503
6,2011-03-31,kandy,21.054354,30.365637,25.17655
7,2011-03-31,kegalle,23.297107,32.822048,27.330123
8,2011-03-31,matara,24.162716,29.262588,26.434777
9,2011-04-30,kandy,21.873159,30.106917,25.205729


In [213]:
df_combined = pd.merge(df_rainfall, df_temp, on=['date', 'district'])

In [214]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729


## wind

In [215]:
import ee
import pandas as pd

# Initialize the Earth Engine API
ee.Initialize()

# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the date range (extended to include the last day)
start_date = '2011-01-01'
end_date = '2011-06-01'  # Extend by one day to include May 31

# Function to calculate wind speed from u and v components
def calculate_wind_speed(image):
    u10 = image.select('u_component_of_wind_10m')
    v10 = image.select('v_component_of_wind_10m')
    wind_speed = u10.pow(2).add(v10.pow(2)).sqrt().rename('wind_speed')
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    wind_speed = wind_speed.set('date', date)
    return wind_speed

# Function to extract wind speed data for each image and region
def extract_wind_speed(image, region):
    date = ee.Date(image.get('date')).format('YYYY-MM-dd')
    wind_speed = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=region,
        scale=5000
    ).get('wind_speed')
    return ee.Feature(None, {'date': date, 'wind_speed': wind_speed})

# Function to get wind speed data for each district
def get_district_wind_speed(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region) \
            .map(calculate_wind_speed)
    
    wind_speed_features = era5.map(lambda image: extract_wind_speed(image, region)).getInfo()
    
    wind_speed_data = [{'date': feature['properties']['date'], 
                        'district': district,
                        'wind_speed': feature['properties']['wind_speed']} 
                       for feature in wind_speed_features['features']]
    
    return wind_speed_data

# Collect wind speed data for all districts
all_wind_speed_data = []
for district, region in districts.items():
    district_wind_speed_data = get_district_wind_speed(district, region)
    all_wind_speed_data.extend(district_wind_speed_data)

# Create a DataFrame
df_wind_speed = pd.DataFrame(all_wind_speed_data)

# Convert the 'date' column to datetime
df_wind_speed['date'] = pd.to_datetime(df_wind_speed['date'])

# Save the DataFrame to a CSV file (if needed)
# df_wind_speed.to_csv('district_wind_speed_2011.csv', index=False)

# Display the DataFrame
print(df_wind_speed)


          date district  wind_speed
0   2011-01-01   matara    1.875536
1   2011-01-02   matara    0.959702
2   2011-01-03   matara    0.722737
3   2011-01-04   matara    1.380615
4   2011-01-05   matara    1.256677
..         ...      ...         ...
448 2011-05-27  kegalle    3.470319
449 2011-05-28  kegalle    3.655297
450 2011-05-29  kegalle    3.372837
451 2011-05-30  kegalle    3.952858
452 2011-05-31  kegalle    4.047127

[453 rows x 3 columns]


In [216]:
df_wind_speed.set_index('date', inplace=True)
df_wind = df_wind_speed.groupby('district').resample('ME').mean()
df_wind.reset_index(inplace=True)
df_wind = df_wind.sort_values(by=['date', 'district']).reset_index(drop=True)
df_wind = df_wind[['date', 'district', 'wind_speed']]
df_wind=df_wind.rename(columns={
    'wind_speed':'wind speed',
    
})


In [217]:
df_wind

Unnamed: 0,date,district,wind speed
0,2011-01-31,kandy,1.952006
1,2011-01-31,kegalle,2.57787
2,2011-01-31,matara,2.059799
3,2011-02-28,kandy,1.535339
4,2011-02-28,kegalle,2.011685
5,2011-02-28,matara,1.321727
6,2011-03-31,kandy,1.219052
7,2011-03-31,kegalle,1.43497
8,2011-03-31,matara,1.615059
9,2011-04-30,kandy,1.044821


In [218]:
df_combined = pd.merge(df_combined,df_wind, on=['date', 'district'])

In [219]:
df_combined


Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821


In [None]:
#[mean_2m_air_temperature, minimum_2m_air_temperature, maximum_2m_air_temperature, dewpoint_2m_temperature, total_precipitation, surface_pressure, mean_sea_level_pressure, u_component_of_wind_10m, v_component_of_wind_10m]

## Dew point

In [220]:


# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the date range (extended to include the last day)
start_date = '2011-01-01'
end_date = '2011-05-31'  # Extend by one day to include May 31

# Function to extract dew point temperature data for each image and region
def extract_dew_point(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    dew_point_temp = image.select('dewpoint_2m_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('dewpoint_2m_temperature')
    return ee.Feature(None, {'date': date, 'dew_point_temp': dew_point_temp})

# Function to get dew point temperature data for each district
def get_district_dew_point(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    dew_point_features = era5.map(lambda image: extract_dew_point(image, region)).getInfo()
    
    dew_point_data = [{'date': feature['properties']['date'], 
                       'district': district,
                       'dew_point_temp': feature['properties']['dew_point_temp']} 
                      for feature in dew_point_features['features']]
    
    return dew_point_data

# Collect dew point temperature data for all districts
all_dew_point_data = []
for district, region in districts.items():
    district_dew_point_data = get_district_dew_point(district, region)
    all_dew_point_data.extend(district_dew_point_data)

# Create a DataFrame
df_dew_point = pd.DataFrame(all_dew_point_data)

# Convert temperatures from Kelvin to Celsius
df_dew_point['dew_point_temp'] = df_dew_point['dew_point_temp'] - 273.15

# Convert the 'date' column to datetime
df_dew_point['date'] = pd.to_datetime(df_dew_point['date'])

# Save the DataFrame to a CSV file (if needed)
# df_dew_point.to_csv('district_dew_point_2011.csv', index=False)

# Display the DataFrame
print(df_dew_point)


          date district  dew_point_temp
0   2011-01-01   matara       23.183843
1   2011-01-02   matara       22.946827
2   2011-01-03   matara       23.472810
3   2011-01-04   matara       22.955106
4   2011-01-05   matara       22.905005
..         ...      ...             ...
445 2011-05-26  kegalle       24.817451
446 2011-05-27  kegalle       24.458998
447 2011-05-28  kegalle       24.403747
448 2011-05-29  kegalle       24.213333
449 2011-05-30  kegalle       24.299549

[450 rows x 3 columns]


In [221]:
df_dew_point.set_index('date', inplace=True)
df_dew = df_dew_point.groupby('district').resample('ME').mean()
df_dew.reset_index(inplace=True)
df_dew = df_dew.sort_values(by=['date', 'district']).reset_index(drop=True)
df_dew = df_dew[['date', 'district', 'dew_point_temp']]
df_dew=df_dew.rename(columns={
    'dew_point_temp':'dew point',
    
})

In [222]:
df_combined = pd.merge(df_combined,df_dew, on=['date', 'district'])

In [223]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824


## surface pressure

In [224]:
import ee
import pandas as pd

# Initialize the Earth Engine API
ee.Initialize()

# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the date range (extended to include the last day)
start_date = '2011-01-01'
end_date = '2011-05-31'  # Extend by one day to include May 31

# Function to extract surface pressure data for each image and region
def extract_surface_pressure(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    surface_pressure = image.select('surface_pressure').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('surface_pressure')
    return ee.Feature(None, {'date': date, 'surface_pressure': surface_pressure})

# Function to get surface pressure data for each district
def get_district_surface_pressure(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    surface_pressure_features = era5.map(lambda image: extract_surface_pressure(image, region)).getInfo()
    
    surface_pressure_data = [{'date': feature['properties']['date'], 
                              'district': district,
                              'surface_pressure': feature['properties']['surface_pressure']} 
                             for feature in surface_pressure_features['features']]
    
    return surface_pressure_data

# Collect surface pressure data for all districts
all_surface_pressure_data = []
for district, region in districts.items():
    district_surface_pressure_data = get_district_surface_pressure(district, region)
    all_surface_pressure_data.extend(district_surface_pressure_data)

# Create a DataFrame
df_surface_pressure = pd.DataFrame(all_surface_pressure_data)

# Convert the 'date' column to datetime
df_surface_pressure['date'] = pd.to_datetime(df_surface_pressure['date'])

# Save the DataFrame to a CSV file (if needed)
# df_surface_pressure.to_csv('district_surface_pressure_2011.csv', index=False)

# Display the DataFrame
print(df_surface_pressure)


          date district  surface_pressure
0   2011-01-01   matara      99814.952952
1   2011-01-02   matara      99848.706313
2   2011-01-03   matara      99840.342046
3   2011-01-04   matara      99820.513603
4   2011-01-05   matara      99755.531346
..         ...      ...               ...
445 2011-05-26  kegalle      99489.766379
446 2011-05-27  kegalle      99541.546897
447 2011-05-28  kegalle      99566.826585
448 2011-05-29  kegalle      99584.574828
449 2011-05-30  kegalle      99567.785985

[450 rows x 3 columns]


In [225]:
df_surface_pressure.set_index('date', inplace=True)
df_pressure = df_surface_pressure.groupby('district').resample('ME').mean()
df_pressure.reset_index(inplace=True)
df_pressure = df_pressure.sort_values(by=['date', 'district']).reset_index(drop=True)
df_pressure = df_pressure[['date', 'district', 'surface_pressure']]
df_pressure=df_pressure.rename(columns={
    'surface_pressure':'surface pressure',
    
})

In [226]:
df_combined = pd.merge(df_combined,df_pressure, on=['date', 'district'])

In [227]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029,96399.319705
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304,99588.391569
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985,99977.29408
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558,96421.585454
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474,99604.903839
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696,100012.636227
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481,96402.126971
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695,99572.935846
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167,100019.503806
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824,96394.862585


## relative humidity

In [228]:
import numpy as np
def saturation_vapor_pressure(temp):
    return 6.112 * np.exp((17.67 * temp) / (temp + 243.5))

# Calculate e_s and e_d for each row
df_combined['e_s'] = df_combined['mean temp'].apply(saturation_vapor_pressure)
df_combined['e_d'] = df_combined['dew point'].apply(saturation_vapor_pressure)

# Calculate relative humidity
df_combined['relative humidity'] = 100 * (df_combined['e_d'] / df_combined['e_s'])

# Drop intermediate columns
df_combined.drop(columns=['e_s', 'e_d'], inplace=True)

# Display the DataFrame with relative humidity
print(df_combined)

         date district    rainfall   min temp   max temp  mean temp  \
0  2011-01-31    kandy  349.679082  20.335188  25.971066  22.770838   
1  2011-01-31  kegalle  190.847129  21.882489  28.097871  24.610605   
2  2011-01-31   matara  336.414355  22.835830  27.804574  25.125435   
3  2011-02-28    kandy  334.853869  20.758498  27.625948  23.815408   
4  2011-02-28  kegalle  255.528496  22.412496  29.707987  25.667430   
5  2011-02-28   matara  304.910505  23.639027  28.285401  25.744503   
6  2011-03-31    kandy  125.319876  21.054354  30.365637  25.176550   
7  2011-03-31  kegalle  137.933144  23.297107  32.822048  27.330123   
8  2011-03-31   matara  243.574135  24.162716  29.262588  26.434777   
9  2011-04-30    kandy  465.714407  21.873159  30.106917  25.205729   
10 2011-04-30  kegalle  508.344547  23.900379  31.725733  26.921379   
11 2011-04-30   matara  318.994323  24.641685  28.977356  26.515309   
12 2011-05-31    kandy  137.730178  23.115136  29.702915  26.079827   
13 201

## cloud coverage

In [229]:
import ee
import pandas as pd

# Initialize the Earth Engine API
ee.Initialize()

# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the date range
start_date = '2011-01-01'
end_date = '2011-06-01'

# Function to calculate cloud coverage for each image and region
def extract_cloud_coverage(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    # Extract the QA band and calculate cloud coverage
    QA = image.select('state_1km')
    cloud_mask = QA.bitwiseAnd(1 << 10).neq(0) # Use bit 10 for clouds in state_1km band
    cloud_coverage = cloud_mask.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=region,
        scale=5000
    ).get('state_1km')
    
    return ee.Feature(None, {
        'date': date, 
        'cloud_coverage': cloud_coverage if cloud_coverage is not None else 0
    })

# Function to get cloud coverage data for each district
def get_district_cloud_coverage(district, region):
    modis = ee.ImageCollection('MODIS/061/MOD09GA') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    cloud_features = modis.map(lambda image: extract_cloud_coverage(image, region)).getInfo()
    
    cloud_data = [{'date': feature['properties']['date'], 
                   'district': district,
                   'cloud_coverage': feature['properties'].get('cloud_coverage', 0)} 
                  for feature in cloud_features['features']]
    
    return cloud_data

# Collect cloud coverage data for all districts
all_cloud_coverage_data = []
for district, region in districts.items():
    district_cloud_coverage_data = get_district_cloud_coverage(district, region)
    all_cloud_coverage_data.extend(district_cloud_coverage_data)

# Create a DataFrame
df_cloud_coverage = pd.DataFrame(all_cloud_coverage_data)

# Convert the 'date' column to datetime
df_cloud_coverage['date'] = pd.to_datetime(df_cloud_coverage['date'])

# Display the DataFrame
print(df_cloud_coverage)


          date district  cloud_coverage
0   2011-01-01   matara        0.261440
1   2011-01-02   matara        0.658974
2   2011-01-03   matara        1.000000
3   2011-01-04   matara        1.000000
4   2011-01-05   matara        1.000000
..         ...      ...             ...
448 2011-05-27  kegalle        0.862516
449 2011-05-28  kegalle        0.000000
450 2011-05-29  kegalle        1.000000
451 2011-05-30  kegalle        0.959703
452 2011-05-31  kegalle        0.899257

[453 rows x 3 columns]


In [230]:
df_cloud_coverage.set_index('date', inplace=True)
df_cloud = df_cloud_coverage.groupby('district').resample('ME').mean()
df_cloud.reset_index(inplace=True)
df_cloud = df_cloud.sort_values(by=['date', 'district']).reset_index(drop=True)
df_cloud = df_cloud[['date', 'district', 'cloud_coverage']]
df_cloud=df_cloud.rename(columns={
    'cloud_coverage':'cloud coverage',
    
})

In [231]:
df_combined = pd.merge(df_combined,df_cloud, on=['date', 'district'])



In [232]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029,96399.319705,87.1956,0.755881
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304,99588.391569,83.402168,0.647816
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985,99977.29408,84.164655,0.60807
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558,96421.585454,83.467879,0.771379
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474,99604.903839,80.316916,0.717534
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696,100012.636227,84.942395,0.74212
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481,96402.126971,76.477862,0.529581
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695,99572.935846,73.946637,0.477782
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167,100019.503806,84.355999,0.576335
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824,96394.862585,80.778683,0.608813


## solar radiation

In [234]:
# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the date range (extended to include the last day)
start_date = '2011-01-01'
end_date = '2011-05-31'  # Extend by one day to include May 31

# Function to extract solar radiation data for each image and region
def extract_solar_radiation(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    solar_radiation = image.select('DSR').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('DSR')
    return ee.Feature(None, {'date': date, 'solar_radiation': solar_radiation})

# Function to get solar radiation data for each district
def get_district_solar_radiation(district, region):
    dataset = ee.ImageCollection('MODIS/061/MCD18A1') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    solar_radiation_features = dataset.map(lambda image: extract_solar_radiation(image, region)).getInfo()
    
    solar_radiation_data = [{'date': feature['properties']['date'], 
                             'district': district,
                             'solar_radiation': feature['properties'].get('solar_radiation', None)} 
                            for feature in solar_radiation_features['features']]
    
    return solar_radiation_data

# Collect solar radiation data for all districts
all_solar_radiation_data = []
for district, region in districts.items():
    district_solar_radiation_data = get_district_solar_radiation(district, region)
    all_solar_radiation_data.extend(district_solar_radiation_data)

# Create a DataFrame
df_solar_radiation = pd.DataFrame(all_solar_radiation_data)

# Convert the 'date' column to datetime
df_solar_radiation['date'] = pd.to_datetime(df_solar_radiation['date'])

# Save the DataFrame to a CSV file (if needed)
# df_solar_radiation.to_csv('district_solar_radiation_2011.csv', index=False)

# Display the DataFrame
print(df_solar_radiation)

          date district  solar_radiation
0   2011-01-01   matara       768.352953
1   2011-01-02   matara       668.498025
2   2011-01-03   matara       562.990004
3   2011-01-04   matara       435.371848
4   2011-01-05   matara       312.514862
..         ...      ...              ...
445 2011-05-26  kegalle       469.277051
446 2011-05-27  kegalle       602.664577
447 2011-05-28  kegalle              NaN
448 2011-05-29  kegalle       475.298701
449 2011-05-30  kegalle       618.815899

[450 rows x 3 columns]


In [236]:
df_solar_radiation.set_index('date', inplace=True)
df_radiation = df_solar_radiation.groupby('district').resample('ME').mean()
df_radiation.reset_index(inplace=True)
df_radiation = df_radiation.sort_values(by=['date', 'district']).reset_index(drop=True)
df_radiation= df_radiation[['date', 'district', 'solar_radiation']]
df_radiation=df_radiation.rename(columns={
    'solar_radiation':'solar radiation',
    
})

In [237]:
df_combined = pd.merge(df_combined,df_radiation, on=['date', 'district'])

In [238]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029,96399.319705,87.1956,0.755881,425.386229
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304,99588.391569,83.402168,0.647816,491.25002
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985,99977.29408,84.164655,0.60807,581.855339
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558,96421.585454,83.467879,0.771379,563.558663
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474,99604.903839,80.316916,0.717534,584.7951
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696,100012.636227,84.942395,0.74212,593.800685
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481,96402.126971,76.477862,0.529581,792.853279
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695,99572.935846,73.946637,0.477782,806.055389
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167,100019.503806,84.355999,0.576335,711.723472
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824,96394.862585,80.778683,0.608813,745.466549


In [165]:
df_combined.isna().sum()

date                 0
district             0
rainfall             0
min temp             0
max temp             0
mean temp            0
wind speed           0
dew point            0
surface pressure     0
relative humidity    0
cloud coverage       0
solar radiation      0
dtype: int64

## elevation

In [239]:

# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Function to extract elevation data for each region
def extract_elevation(region):
    elevation = ee.Image('USGS/SRTMGL1_003')

    min_elevation = elevation.reduceRegion(
        reducer=ee.Reducer.min(), geometry=region, scale=30).get('elevation')
    mean_elevation = elevation.reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=30).get('elevation')
    max_elevation = elevation.reduceRegion(
        reducer=ee.Reducer.max(), geometry=region, scale=30).get('elevation')

    return {
        'min_elevation': min_elevation.getInfo(),
        'mean_elevation': mean_elevation.getInfo(),
        'max_elevation': max_elevation.getInfo()
    }

# Collect elevation data for all districts
all_elevation_data = []
for district, region in districts.items():
    elevation_data = extract_elevation(region)
    elevation_data['district'] = district
    all_elevation_data.append(elevation_data)

# Create a DataFrame
df_elevation = pd.DataFrame(all_elevation_data)

# Display the DataFrame
print(df_elevation)


   min_elevation  mean_elevation  max_elevation district
0             -7       21.653705            296   matara
1            132      619.259399           1693    kandy
2             21      222.039826           1264  kegalle


In [240]:
df_elevation= df_elevation[['district', 'min_elevation', 'max_elevation','mean_elevation']]
df_elevation=df_elevation.rename(columns={
    'min_elevation':'min elevation',
    'max_elevation':'max elevation',
    'mean_elevation':'mean elevation'
    
})

In [241]:
df_elevation

Unnamed: 0,district,min elevation,max elevation,mean elevation
0,matara,-7,296,21.653705
1,kandy,132,1693,619.259399
2,kegalle,21,1264,222.039826


In [242]:
df_combined = pd.merge(df_combined,df_elevation, on=['district'])

In [244]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation,min elevation,max elevation,mean elevation
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029,96399.319705,87.1956,0.755881,425.386229,132,1693,619.259399
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304,99588.391569,83.402168,0.647816,491.25002,21,1264,222.039826
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985,99977.29408,84.164655,0.60807,581.855339,-7,296,21.653705
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558,96421.585454,83.467879,0.771379,563.558663,132,1693,619.259399
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474,99604.903839,80.316916,0.717534,584.7951,21,1264,222.039826
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696,100012.636227,84.942395,0.74212,593.800685,-7,296,21.653705
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481,96402.126971,76.477862,0.529581,792.853279,132,1693,619.259399
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695,99572.935846,73.946637,0.477782,806.055389,21,1264,222.039826
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167,100019.503806,84.355999,0.576335,711.723472,-7,296,21.653705
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824,96394.862585,80.778683,0.608813,745.466549,132,1693,619.259399


## vegetation

In [245]:
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the date range (MODIS VCF is usually available annually, so let's use 2011)
year = 2011
start_date = f'{year}-01-01'
end_date = f'{year}-12-31'

# Function to extract forest cover data for each image and region
def extract_forest_cover(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    tree_cover = image.select('Percent_Tree_Cover').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=250).get('Percent_Tree_Cover')
    return ee.Feature(None, {'date': date, 'tree_cover': tree_cover})

# Function to get forest cover data for each district
def get_district_forest_cover(district, region):
    modis_vcf = ee.ImageCollection('MODIS/006/MOD44B') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    forest_cover_features = modis_vcf.map(lambda image: extract_forest_cover(image, region)).getInfo()
    
    forest_cover_data = [{'date': feature['properties']['date'], 
                         'district': district,
                         'tree_cover': feature['properties']['tree_cover']} 
                        for feature in forest_cover_features['features']]
    
    return forest_cover_data

# Collect forest cover data for all districts
all_forest_cover_data = []
for district, region in districts.items():
    district_forest_cover_data = get_district_forest_cover(district, region)
    all_forest_cover_data.extend(district_forest_cover_data)

# Create a DataFrame
df_forest_cover = pd.DataFrame(all_forest_cover_data)

# Ensure the 'date' column is in datetime format
df_forest_cover['date'] = pd.to_datetime(df_forest_cover['date'])

# Merge the forest cover data with the existing DataFrame
print(df_forest_cover)

        date district  tree_cover
0 2011-03-06   matara   39.082176
1 2011-03-06    kandy   38.348642
2 2011-03-06  kegalle   44.336820


In [246]:
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the start and end year
start_year = 2011
end_year = 2014

# Function to extract forest cover data for each image and region
def extract_forest_cover(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    tree_cover = image.select('Percent_Tree_Cover').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=250).get('Percent_Tree_Cover')
    return ee.Feature(None, {'date': date, 'tree_cover': tree_cover})

# Function to get forest cover data for each district
def get_district_forest_cover(district, region, start_date, end_date):
    modis_vcf = ee.ImageCollection('MODIS/006/MOD44B') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    forest_cover_features = modis_vcf.map(lambda image: extract_forest_cover(image, region)).getInfo()
    
    forest_cover_data = [{'date': feature['properties']['date'], 
                         'district': district,
                         'tree_cover': feature['properties']['tree_cover']} 
                        for feature in forest_cover_features['features']]
    
    return forest_cover_data

# Collect forest cover data for all districts and years
all_forest_cover_data = []
for year in range(start_year, end_year + 1):
    start_date = f'{year}-01-01'
    end_date = f'{year}-12-31'
    for district, region in districts.items():
        district_forest_cover_data = get_district_forest_cover(district, region, start_date, end_date)
        all_forest_cover_data.extend(district_forest_cover_data)

# Create a DataFrame for forest cover data
df_forest_cover = pd.DataFrame(all_forest_cover_data)

# Ensure the 'date' column is in datetime format
df_forest_cover['date'] = pd.to_datetime(df_forest_cover['date'])

print(df_forest_cover)


         date district  tree_cover
0  2011-03-06   matara   39.082176
1  2011-03-06    kandy   38.348642
2  2011-03-06  kegalle   44.336820
3  2012-03-05   matara   44.399938
4  2012-03-05    kandy   41.934153
5  2012-03-05  kegalle   52.993439
6  2013-03-06   matara   31.977864
7  2013-03-06    kandy   38.311179
8  2013-03-06  kegalle   42.755281
9  2014-03-06   matara   34.543589
10 2014-03-06    kandy   41.804247
11 2014-03-06  kegalle   47.319173


In [247]:
df_forest_cover=df_forest_cover.rename(columns={
    'tree_cover':'tree cover'
})

In [248]:
df_combined


Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation,min elevation,max elevation,mean elevation
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029,96399.319705,87.1956,0.755881,425.386229,132,1693,619.259399
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304,99588.391569,83.402168,0.647816,491.25002,21,1264,222.039826
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985,99977.29408,84.164655,0.60807,581.855339,-7,296,21.653705
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558,96421.585454,83.467879,0.771379,563.558663,132,1693,619.259399
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474,99604.903839,80.316916,0.717534,584.7951,21,1264,222.039826
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696,100012.636227,84.942395,0.74212,593.800685,-7,296,21.653705
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481,96402.126971,76.477862,0.529581,792.853279,132,1693,619.259399
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695,99572.935846,73.946637,0.477782,806.055389,21,1264,222.039826
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167,100019.503806,84.355999,0.576335,711.723472,-7,296,21.653705
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824,96394.862585,80.778683,0.608813,745.466549,132,1693,619.259399


In [249]:
df_combined['year']=df_combined['date'].dt.year
df_forest_cover['year']=df_forest_cover['date'].dt.year
df_combined = pd.merge(df_combined,df_forest_cover,on=['district','year'])
df_combined.drop(columns=['year'], inplace=True)
df_combined

Unnamed: 0,date_x,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation,min elevation,max elevation,mean elevation,date_y,tree cover
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029,96399.319705,87.1956,0.755881,425.386229,132,1693,619.259399,2011-03-06,38.348642
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304,99588.391569,83.402168,0.647816,491.25002,21,1264,222.039826,2011-03-06,44.33682
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985,99977.29408,84.164655,0.60807,581.855339,-7,296,21.653705,2011-03-06,39.082176
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558,96421.585454,83.467879,0.771379,563.558663,132,1693,619.259399,2011-03-06,38.348642
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474,99604.903839,80.316916,0.717534,584.7951,21,1264,222.039826,2011-03-06,44.33682
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696,100012.636227,84.942395,0.74212,593.800685,-7,296,21.653705,2011-03-06,39.082176
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481,96402.126971,76.477862,0.529581,792.853279,132,1693,619.259399,2011-03-06,38.348642
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695,99572.935846,73.946637,0.477782,806.055389,21,1264,222.039826,2011-03-06,44.33682
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167,100019.503806,84.355999,0.576335,711.723472,-7,296,21.653705,2011-03-06,39.082176
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824,96394.862585,80.778683,0.608813,745.466549,132,1693,619.259399,2011-03-06,38.348642


In [250]:
df_combined.drop(columns=['date_y'],inplace=True)


In [251]:
df_combined=df_combined.rename(columns={
    'date_x':'date'
})

In [252]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation,min elevation,max elevation,mean elevation,tree cover
0,2011-01-31,kandy,349.679082,20.335188,25.971066,22.770838,1.952006,20.532029,96399.319705,87.1956,0.755881,425.386229,132,1693,619.259399,38.348642
1,2011-01-31,kegalle,190.847129,21.882489,28.097871,24.610605,2.57787,21.612304,99588.391569,83.402168,0.647816,491.25002,21,1264,222.039826,44.33682
2,2011-01-31,matara,336.414355,22.83583,27.804574,25.125435,2.059799,22.264985,99977.29408,84.164655,0.60807,581.855339,-7,296,21.653705,39.082176
3,2011-02-28,kandy,334.853869,20.758498,27.625948,23.815408,1.535339,20.847558,96421.585454,83.467879,0.771379,563.558663,132,1693,619.259399,38.348642
4,2011-02-28,kegalle,255.528496,22.412496,29.707987,25.66743,2.011685,22.026474,99604.903839,80.316916,0.717534,584.7951,21,1264,222.039826,44.33682
5,2011-02-28,matara,304.910505,23.639027,28.285401,25.744503,1.321727,23.022696,100012.636227,84.942395,0.74212,593.800685,-7,296,21.653705,39.082176
6,2011-03-31,kandy,125.319876,21.054354,30.365637,25.17655,1.219052,20.751481,96402.126971,76.477862,0.529581,792.853279,132,1693,619.259399,38.348642
7,2011-03-31,kegalle,137.933144,23.297107,32.822048,27.330123,1.43497,22.280695,99572.935846,73.946637,0.477782,806.055389,21,1264,222.039826,44.33682
8,2011-03-31,matara,243.574135,24.162716,29.262588,26.434777,1.615059,23.584167,100019.503806,84.355999,0.576335,711.723472,-7,296,21.653705,39.082176
9,2011-04-30,kandy,465.714407,21.873159,30.106917,25.205729,1.044821,21.670824,96394.862585,80.778683,0.608813,745.466549,132,1693,619.259399,38.348642


In [204]:


# Define the regions of interest for each district
districts = {
    'matara': ee.Geometry.Point([80.5353, 5.9488]).buffer(20000),
    'kandy': ee.Geometry.Point([80.6371, 7.2906]).buffer(20000),
    'kegalle': ee.Geometry.Point([80.3485, 7.2530]).buffer(20000)
}

# Define the time range
years = [2011, 2012, 2013, 2014]

# Function to extract urbanization data for each image and region
def extract_urbanization(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    urban_area = image.reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=500).get('built_surface')
    return ee.Feature(None, {'date': date, 'urban_area': urban_area})

# Function to get urbanization data for each district
def get_district_urbanization(district, region):
    ghsl =ee.ImageCollection("JRC/GHSL/P2023A/GHS_BUILT_S") \
            .filterBounds(region) \
            .map(lambda img: img.clip(region)) \
            .select('built_surface')
    
    urbanization_features = ghsl.map(lambda image: extract_urbanization(image, region)).getInfo()
    
    urbanization_data = [{'date': feature['properties']['date'], 
                          'district': district,
                          'urban_area': feature['properties']['urban_area']} 
                         for feature in urbanization_features['features']]
    
    return urbanization_data

# Collect urbanization data for all districts and years
all_urbanization_data = []
for district, region in districts.items():
    for year in years:
        start_date = f'{year}-01-01'
        end_date = f'{year}-12-31'
        urbanization_data = get_district_urbanization(district, region)
        all_urbanization_data.extend(urbanization_data)

# Create a DataFrame
df_urbanization = pd.DataFrame(all_urbanization_data)

# Convert the 'date' column to datetime
df_urbanization['date'] = pd.to_datetime(df_urbanization['date'])

# Save the DataFrame to a CSV file (if needed)
# df_urbanization.to_csv('district_urbanization.csv', index=False)

# Display the DataFrame
print(df_urbanization)


          date district  urban_area
0   1975-01-01   matara   11.402187
1   1980-01-01   matara   14.439952
2   1985-01-01   matara   20.138398
3   1990-01-01   matara   27.137343
4   1995-01-01   matara   36.944413
..         ...      ...         ...
139 2010-01-01  kegalle   23.316268
140 2015-01-01  kegalle   54.449276
141 2020-01-01  kegalle  112.675309
142 2025-01-01  kegalle  168.962451
143 2030-01-01  kegalle  209.608885

[144 rows x 3 columns]


In [205]:
df_urbanization.tail()

Unnamed: 0,date,district,urban_area
139,2010-01-01,kegalle,23.316268
140,2015-01-01,kegalle,54.449276
141,2020-01-01,kegalle,112.675309
142,2025-01-01,kegalle,168.962451
143,2030-01-01,kegalle,209.608885


In [253]:
df_combined.to_excel('test_dengue_dataset.xlsx',index=False)