In [446]:
import folium
import geopandas as gpd
import ee

# Initialize the Earth Engine
ee.Initialize(project='ee-s18370')

# Load the GeoJSON file using geopandas
geojson_file = 'lk.json'
gdf = gpd.read_file(geojson_file)

# Initialize a folium map centered around Sri Lanka
m = folium.Map(location=[7.8731, 80.7718], zoom_start=8)

# Function to add polygons to the folium map
def add_polygon_to_map(district_name, geometry, map_object):
    geo_json = geometry.getInfo()  # Get geometry info from Earth Engine
    folium.GeoJson(
        geo_json,
        name=district_name,
        style_function=lambda feature: {
            'color': 'blue',
            'weight': 2,
            'fillOpacity': 0.1,
        }
    ).add_to(map_object)

# Add each district polygon to the map
for index, row in gdf.iterrows():
    district_name = row['name']
    geometry = row['geometry']
    coords = geometry.__geo_interface__['coordinates']
    
    if geometry.geom_type == 'Polygon':
        ee_geometry = ee.Geometry.Polygon(coords)
    elif geometry.geom_type == 'MultiPolygon':
        ee_geometry = ee.Geometry.MultiPolygon(coords)
    else:
        continue
    
    add_polygon_to_map(district_name, ee_geometry, m)

# Display the map
m.save('sri_lanka_districts.html')


# RAINFALL

In [447]:
import geopandas as gpd
import ee
import pandas as pd

# Initialize the Earth Engine API
ee.Initialize(project='ee-s18370')

# Create a mapping of current district names to the original names
name_mapping = {
    'Trikuṇāmalaya': 'Trincomalee',
    'Mulativ': 'Mullaitivu',
    'Yāpanaya': 'Jaffna',
    'Kilinŏchchi': 'Kilinochchi',
    'Mannārama': 'Mannar',
    'Puttalama': 'Puttalam',
    'Gampaha': 'Gampaha',
    'Kŏḷamba': 'Colombo',
    'Kaḷutara': 'Kalutara',
    'Gālla': 'Galle',
    'Mātara': 'Matara',
    'Hambantŏṭa': 'Hambantota',
    'Ampāra': 'Ampara',
    'Maḍakalapuva': 'Batticaloa',
    'Ratnapura': 'Ratnapura',
    'Mŏṇarāgala': 'Moneragala',
    'Kægalla': 'Kegalle',
    'Badulla': 'Badulla',
    'Mātale': 'Matale',
    'Pŏḷŏnnaruva': 'Polonnaruwa',
    'Kuruṇægala': 'Kurunegala',
    'Anurādhapura': 'Anuradhapura',
    'Nuvara Ĕliya': 'Nuwara Eliya',
    'Vavuniyāva': 'Vavuniya',
    'Mahanuvara': 'Kandy'
}

# Read the GeoJSON file
json_file = 'lk.json'
gdf = gpd.read_file(json_file)

# Initialize an empty dictionary to store the district geometries
districts = {}

# Iterate over the rows in the GeoDataFrame to extract district geometries
for index, row in gdf.iterrows():
    district_name = row['name']  # Assuming the district name is in the 'name' column
    
    # Apply the name mapping
    original_name = name_mapping.get(district_name, district_name)
    
    geometry = row['geometry']   # Get the geometry

    # Convert the geometry to a format compatible with GEE
    coords = geometry.__geo_interface__['coordinates']
    if geometry.geom_type == 'Polygon':
        ee_geometry = ee.Geometry.Polygon(coords)
    elif geometry.geom_type == 'MultiPolygon':
        ee_geometry = ee.Geometry.MultiPolygon(coords)
    else:
        print(f"Skipping unsupported geometry type: {geometry.geom_type}")
        continue

    # Add the GEE geometry to the dictionary with the original district name as the key
    districts[original_name] = ee_geometry

# Define the date range
start_date = '2021-01-01'
end_date = '2021-12-31'

# Function to extract rainfall data for each image and region
def extract_rainfall(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')  # Extract the date
    rainfall = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=region,
        scale=5000
    ).get('precipitation')
    
    # Return both date and rainfall in a feature
    return ee.Feature(None, {'date': date, 'rainfall': rainfall})

# Function to get data for each district
def get_district_data(district, region):
    chirps = ee.ImageCollection('UCSB-CHG/CHIRPS/DAILY') \
                .filterDate(start_date, end_date) \
                .filterBounds(region)
    
    rainfall_features = chirps.map(lambda image: extract_rainfall(image, region)).getInfo()
    
    # Ensure the features are present and properly extracted
    if 'features' not in rainfall_features:
        return []
    
    # Extract the rainfall data with date and district information
    rainfall_data = [{'date': feature['properties']['date'],  # Include date here
                      'district': district,
                      'rainfall': feature['properties']['rainfall']} 
                     for feature in rainfall_features['features']]
    
    return rainfall_data

# Now call the district data extraction as before
all_data = []
for district, region in districts.items():
    district_data = get_district_data(district, region)
    all_data.extend(district_data)

# Create the DataFrame again
df = pd.DataFrame(all_data)

# Check the first few rows to ensure the 'date' column is present
print(df.head())  


         date     district   rainfall
0  2021-01-01  Trincomalee   0.000000
1  2021-01-02  Trincomalee   3.527259
2  2021-01-03  Trincomalee  22.025002
3  2021-01-04  Trincomalee  21.337069
4  2021-01-05  Trincomalee   6.839577


In [448]:
# Ensure the 'date' column is in datetime format
df['date'] = pd.to_datetime(df['date'], errors='coerce')

# Extract year and month for grouping
df['year_month'] = df['date'].dt.to_period('M')  # Creates a year-month column in Period dtype

# Group by district and year-month, and then sum the rainfall
monthly_rainfall = df.groupby(['district', 'year_month'])['rainfall'].sum().reset_index()

# Convert the 'year_month' period to the last day of the month and format it as a string (object)
monthly_rainfall['date'] = monthly_rainfall['year_month'].dt.to_timestamp(how='end').dt.strftime('%Y-%m-%d')

# Drop 'year_month' column if it's no longer needed
monthly_rainfall.drop(columns=['year_month'], inplace=True)

# Reorder columns for clarity
monthly_rainfall['date'] = pd.to_datetime(monthly_rainfall['date'], errors='coerce')
monthly_rainfall = monthly_rainfall[['date', 'district', 'rainfall']]

# Display the result
print(monthly_rainfall)

          date  district    rainfall
0   2021-01-31    Ampara  363.088491
1   2021-02-28    Ampara  128.739526
2   2021-03-31    Ampara  134.947477
3   2021-04-30    Ampara   83.631609
4   2021-05-31    Ampara  131.615330
..         ...       ...         ...
295 2021-08-31  Vavuniya   42.234285
296 2021-09-30  Vavuniya  158.343775
297 2021-10-31  Vavuniya  299.533812
298 2021-11-30  Vavuniya  663.122439
299 2021-12-31  Vavuniya   87.326954

[300 rows x 3 columns]


In [449]:
print(monthly_rainfall.dtypes)

date        datetime64[ns]
district            object
rainfall           float64
dtype: object


# Temprature

In [451]:
# Define the date range (extended to include the last day)
start_date = '2021-01-01'
end_date = '2021-12-31'  # Extend by one day to include May 31

# Function to extract temperature data for each image and region
def extract_temperature(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    min_temp = image.select('minimum_2m_air_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('minimum_2m_air_temperature')
    mean_temp = image.select('mean_2m_air_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('mean_2m_air_temperature')
    max_temp = image.select('maximum_2m_air_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('maximum_2m_air_temperature')
    return ee.Feature(None, {'date': date, 'min_temp': min_temp, 'mean_temp': mean_temp, 'max_temp': max_temp})

# Function to get temperature data for each district
def get_district_temperature(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    temperature_features = era5.map(lambda image: extract_temperature(image, region)).getInfo()
    
    temperature_data = [{'date': feature['properties']['date'], 
                         'district': district,
                         'min_temp': feature['properties']['min_temp'],
                         'mean_temp': feature['properties']['mean_temp'],
                         'max_temp': feature['properties']['max_temp']} 
                        for feature in temperature_features['features']]
    
    return temperature_data

# Collect temperature data for all districts
all_temperature_data = []
for district, region in districts.items():
    district_temperature_data = get_district_temperature(district, region)
    all_temperature_data.extend(district_temperature_data)

# Create a DataFrame
df_temperature = pd.DataFrame(all_temperature_data)

# Convert temperatures from Kelvin to Celsius
df_temperature['min_temp'] = df_temperature['min_temp'] - 273.15
df_temperature['mean_temp'] = df_temperature['mean_temp'] - 273.15
df_temperature['max_temp'] = df_temperature['max_temp'] - 273.15

# Ensure the 'date' column is in datetime format
df_temperature['date'] = pd.to_datetime(df_temperature['date'])

KeyError: 'min_temp'

In [None]:
df_temperature.set_index('date', inplace=True)
df_temp = df_temperature.groupby('district').resample('ME').mean()
df_temp.reset_index(inplace=True)
df_temp = df_temp.sort_values(by=['date', 'district']).reset_index(drop=True)
df_temp = df_temp[['date', 'district', 'min_temp','max_temp','mean_temp']]
df_temp=df_temp.rename(columns={
    'min_temp':'min temp',
    'max_temp':'max temp',
    'mean_temp':'mean temp'
})

df_temp

Unnamed: 0,date,district,min temp,max temp,mean temp
0,2020-01-31,Ampara,22.033402,29.427500,25.304901
1,2020-01-31,Anuradhapura,22.493609,29.996842,25.948760
2,2020-01-31,Badulla,18.837955,26.910938,22.482890
3,2020-01-31,Batticaloa,23.086550,29.432555,25.918559
4,2020-01-31,Colombo,24.577640,32.293555,28.117113
...,...,...,...,...,...
170,2020-07-31,Polonnaruwa,26.055390,34.298361,29.261693
171,2020-07-31,Puttalam,27.135161,29.604614,28.229602
172,2020-07-31,Ratnapura,22.915704,28.501252,25.188201
173,2020-07-31,Trincomalee,26.463256,33.910205,29.353936


In [None]:
print(df_temp.dtypes)

date         datetime64[ns]
district             object
min temp            float64
max temp            float64
mean temp           float64
dtype: object


In [None]:
df_combined = pd.merge(monthly_rainfall, df_temp, on=['date', 'district'])

In [452]:
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation,min elevation,max elevation,mean elevation,tree_cover,urbanization
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215,22.030324,100136.977600,82.086512,0.729976,610.151188,-20,858,65.859763,26.830810,1.989998
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456,21.939477,100172.873559,80.339398,0.753856,604.735088,-20,858,65.859763,26.830810,1.989998
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555,22.105364,100023.332831,74.769917,0.657447,723.050410,-20,858,65.859763,26.830810,1.989998
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681,23.686100,99959.218347,77.240012,0.586747,770.067486,-20,858,65.859763,26.830810,1.989998
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194,24.423849,99654.787896,76.729817,0.621787,681.313952,-20,858,65.859763,26.830810,1.989998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143,22.406440,100499.844328,71.232829,0.537118,793.402548,20,190,78.095170,33.641493,1.998551
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779,24.209140,100428.697945,73.290690,0.609212,805.273223,20,190,78.095170,33.641493,1.998551
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750,25.380402,100139.145874,78.468824,0.687780,689.369725,20,190,78.095170,33.641493,1.998551
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719,24.439711,100069.469044,72.652221,0.636293,707.561118,20,190,78.095170,33.641493,1.998551


# Wind

In [None]:
# Initialize the Earth Engine API
ee.Initialize()


# Define the date range (extended to include the last day)
start_date = '2021-01-01'
end_date = '2021-12-31'  # Extend by one day to include May 31

# Function to calculate wind speed from u and v components
def calculate_wind_speed(image):
    u10 = image.select('u_component_of_wind_10m')
    v10 = image.select('v_component_of_wind_10m')
    wind_speed = u10.pow(2).add(v10.pow(2)).sqrt().rename('wind_speed')
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    wind_speed = wind_speed.set('date', date)
    return wind_speed

# Function to extract wind speed data for each image and region
def extract_wind_speed(image, region):
    date = ee.Date(image.get('date')).format('YYYY-MM-dd')
    wind_speed = image.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=region,
        scale=5000
    ).get('wind_speed')
    return ee.Feature(None, {'date': date, 'wind_speed': wind_speed})

# Function to get wind speed data for each district
def get_district_wind_speed(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region) \
            .map(calculate_wind_speed)
    
    wind_speed_features = era5.map(lambda image: extract_wind_speed(image, region)).getInfo()
    
    wind_speed_data = [{'date': feature['properties']['date'], 
                        'district': district,
                        'wind_speed': feature['properties']['wind_speed']} 
                       for feature in wind_speed_features['features']]
    
    return wind_speed_data

# Collect wind speed data for all districts
all_wind_speed_data = []
for district, region in districts.items():
    district_wind_speed_data = get_district_wind_speed(district, region)
    all_wind_speed_data.extend(district_wind_speed_data)

# Create a DataFrame
df_wind_speed = pd.DataFrame(all_wind_speed_data)

# Convert the 'date' column to datetime
df_wind_speed['date'] = pd.to_datetime(df_wind_speed['date'])

# Save the DataFrame to a CSV file (if needed)
# df_wind_speed.to_csv('district_wind_speed_2011.csv', index=False)

# Display the DataFrame
print(df_wind_speed)

           date     district  wind_speed
0    2020-01-01  Trincomalee    2.504977
1    2020-01-02  Trincomalee    2.690178
2    2020-01-03  Trincomalee    2.748634
3    2020-01-04  Trincomalee    2.647236
4    2020-01-05  Trincomalee    3.458602
...         ...          ...         ...
4770 2020-07-05        Kandy    3.597761
4771 2020-07-06        Kandy    3.562922
4772 2020-07-07        Kandy    2.567389
4773 2020-07-08        Kandy    1.604430
4774 2020-07-09        Kandy    1.522783

[4775 rows x 3 columns]


In [None]:
df_wind_speed.set_index('date', inplace=True)
df_wind = df_wind_speed.groupby('district').resample('ME').mean()
df_wind.reset_index(inplace=True)
df_wind = df_wind.sort_values(by=['date', 'district']).reset_index(drop=True)
df_wind = df_wind[['date', 'district', 'wind_speed']]
df_wind=df_wind.rename(columns={
    'wind_speed':'wind speed',
    
})

In [None]:
df_combined = pd.merge(df_combined,df_wind, on=['date', 'district'])
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194
...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719


# Dew Point

In [None]:

# Define the date range (extended to include the last day)
start_date = '2021-01-01'
end_date = '2021-12-31'  

# Function to extract dew point temperature data for each image and region
def extract_dew_point(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    dew_point_temp = image.select('dewpoint_2m_temperature').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('dewpoint_2m_temperature')
    return ee.Feature(None, {'date': date, 'dew_point_temp': dew_point_temp})

# Function to get dew point temperature data for each district
def get_district_dew_point(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    dew_point_features = era5.map(lambda image: extract_dew_point(image, region)).getInfo()
    
    dew_point_data = [{'date': feature['properties']['date'], 
                       'district': district,
                       'dew_point_temp': feature['properties']['dew_point_temp']} 
                      for feature in dew_point_features['features']]
    
    return dew_point_data

# Collect dew point temperature data for all districts
all_dew_point_data = []
for district, region in districts.items():
    district_dew_point_data = get_district_dew_point(district, region)
    all_dew_point_data.extend(district_dew_point_data)

# Create a DataFrame
df_dew_point = pd.DataFrame(all_dew_point_data)

# Convert temperatures from Kelvin to Celsius
df_dew_point['dew_point_temp'] = df_dew_point['dew_point_temp'] - 273.15

# Convert the 'date' column to datetime
df_dew_point['date'] = pd.to_datetime(df_dew_point['date'])

# Save the DataFrame to a CSV file (if needed)
# df_dew_point.to_csv('district_dew_point_2011.csv', index=False)

# Display the DataFrame
print(df_dew_point)


           date     district  dew_point_temp
0    2020-01-01  Trincomalee       24.225111
1    2020-01-02  Trincomalee       24.478167
2    2020-01-03  Trincomalee       24.596626
3    2020-01-04  Trincomalee       24.172927
4    2020-01-05  Trincomalee       23.863809
...         ...          ...             ...
4770 2020-07-05        Kandy       20.644976
4771 2020-07-06        Kandy       21.130081
4772 2020-07-07        Kandy       21.570312
4773 2020-07-08        Kandy       22.197996
4774 2020-07-09        Kandy       22.133919

[4775 rows x 3 columns]


In [None]:
df_dew_point.set_index('date', inplace=True)
df_dew = df_dew_point.groupby('district').resample('ME').mean()
df_dew.reset_index(inplace=True)
df_dew = df_dew.sort_values(by=['date', 'district']).reset_index(drop=True)
df_dew = df_dew[['date', 'district', 'dew_point_temp']]
df_dew=df_dew.rename(columns={
    'dew_point_temp':'dew point',
    
})

In [None]:
df_combined = pd.merge(df_combined,df_dew, on=['date', 'district'])
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215,22.030324
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456,21.939477
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555,22.105364
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681,23.686100
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194,24.423849
...,...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143,22.406440
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779,24.209140
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750,25.380402
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719,24.439711


# SURFACE PRESSURE

In [None]:
# Initialize the Earth Engine API
ee.Initialize()


# Define the date range (extended to include the last day)
start_date = '2021-01-01'
end_date = '2021-12-31'  # Extend by one day to include May 31

# Function to extract surface pressure data for each image and region
def extract_surface_pressure(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    surface_pressure = image.select('surface_pressure').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('surface_pressure')
    return ee.Feature(None, {'date': date, 'surface_pressure': surface_pressure})

# Function to get surface pressure data for each district
def get_district_surface_pressure(district, region):
    era5 = ee.ImageCollection('ECMWF/ERA5/DAILY') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    surface_pressure_features = era5.map(lambda image: extract_surface_pressure(image, region)).getInfo()
    
    surface_pressure_data = [{'date': feature['properties']['date'], 
                              'district': district,
                              'surface_pressure': feature['properties']['surface_pressure']} 
                             for feature in surface_pressure_features['features']]
    
    return surface_pressure_data

# Collect surface pressure data for all districts
all_surface_pressure_data = []
for district, region in districts.items():
    district_surface_pressure_data = get_district_surface_pressure(district, region)
    all_surface_pressure_data.extend(district_surface_pressure_data)

# Create a DataFrame
df_surface_pressure = pd.DataFrame(all_surface_pressure_data)

# Convert the 'date' column to datetime
df_surface_pressure['date'] = pd.to_datetime(df_surface_pressure['date'])

# Save the DataFrame to a CSV file (if needed)
# df_surface_pressure.to_csv('district_surface_pressure_2011.csv', index=False)

# Display the DataFrame
print(df_surface_pressure)

           date     district  surface_pressure
0    2020-01-01  Trincomalee     100814.006815
1    2020-01-02  Trincomalee     100884.355294
2    2020-01-03  Trincomalee     100747.260196
3    2020-01-04  Trincomalee     100611.463802
4    2020-01-05  Trincomalee     100634.560348
...         ...          ...               ...
4770 2020-07-05        Kandy      95569.161816
4771 2020-07-06        Kandy      95583.406658
4772 2020-07-07        Kandy      95685.392550
4773 2020-07-08        Kandy      95665.222072
4774 2020-07-09        Kandy      95613.508600

[4775 rows x 3 columns]


In [None]:
df_surface_pressure.set_index('date', inplace=True)
df_pressure = df_surface_pressure.groupby('district').resample('ME').mean()
df_pressure.reset_index(inplace=True)
df_pressure = df_pressure.sort_values(by=['date', 'district']).reset_index(drop=True)
df_pressure = df_pressure[['date', 'district', 'surface_pressure']]
df_pressure=df_pressure.rename(columns={
    'surface_pressure':'surface pressure',
    
})

In [None]:
df_combined = pd.merge(df_combined,df_pressure, on=['date', 'district'])
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215,22.030324,100136.977600
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456,21.939477,100172.873559
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555,22.105364,100023.332831
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681,23.686100,99959.218347
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194,24.423849,99654.787896
...,...,...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143,22.406440,100499.844328
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779,24.209140,100428.697945
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750,25.380402,100139.145874
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719,24.439711,100069.469044


# RELATIVE HUMIDITY

In [None]:
import numpy as np
def saturation_vapor_pressure(temp):
    return 6.112 * np.exp((17.67 * temp) / (temp + 243.5))

# Calculate e_s and e_d for each row
df_combined['e_s'] = df_combined['mean temp'].apply(saturation_vapor_pressure)
df_combined['e_d'] = df_combined['dew point'].apply(saturation_vapor_pressure)

# Calculate relative humidity
df_combined['relative humidity'] = 100 * (df_combined['e_d'] / df_combined['e_s'])

# Drop intermediate columns
df_combined.drop(columns=['e_s', 'e_d'], inplace=True)

# Display the DataFrame with relative humidity
print(df_combined)

          date  district    rainfall   min temp   max temp  mean temp  \
0   2020-01-31    Ampara   54.097461  22.033402  29.427500  25.304901   
1   2020-02-29    Ampara   27.578651  22.217799  29.555104  25.573322   
2   2020-03-31    Ampara   16.891400  22.789691  32.078556  26.959707   
3   2020-04-30    Ampara   56.572590  24.085308  33.266407  28.040798   
4   2020-05-31    Ampara  167.530384  25.633958  33.364023  28.917083   
..         ...       ...         ...        ...        ...        ...   
170 2020-03-31  Vavuniya    7.497944  23.516430  33.461552  28.100214   
171 2020-04-30  Vavuniya   65.149132  25.558257  34.637041  29.487052   
172 2020-05-31  Vavuniya  147.882344  26.783664  33.291558  29.517250   
173 2020-06-30  Vavuniya    5.482827  27.109948  33.777595  29.878702   
174 2020-07-31  Vavuniya  221.851802  26.972228  33.981664  29.627208   

     wind speed  dew point  surface pressure  relative humidity  
0      2.207215  22.030324     100136.977600          82.

# CLOUD COVERAGE

In [None]:
# Define the date range
start_date = '2021-01-01'
end_date = '2021-12-31'

# Function to calculate cloud coverage for each image and region
def extract_cloud_coverage(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    # Extract the QA band and calculate cloud coverage
    QA = image.select('state_1km')
    cloud_mask = QA.bitwiseAnd(1 << 10).neq(0) # Use bit 10 for clouds in state_1km band
    cloud_coverage = cloud_mask.reduceRegion(
        reducer=ee.Reducer.mean(),
        geometry=region,
        scale=5000
    ).get('state_1km')
    
    return ee.Feature(None, {
        'date': date, 
        'cloud_coverage': cloud_coverage if cloud_coverage is not None else 0
    })

# Function to get cloud coverage data for each district
def get_district_cloud_coverage(district, region):
    modis = ee.ImageCollection('MODIS/061/MOD09GA') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    cloud_features = modis.map(lambda image: extract_cloud_coverage(image, region)).getInfo()
    
    cloud_data = [{'date': feature['properties']['date'], 
                   'district': district,
                   'cloud_coverage': feature['properties'].get('cloud_coverage', 0)} 
                  for feature in cloud_features['features']]
    
    return cloud_data

# Collect cloud coverage data for all districts
all_cloud_coverage_data = []
for district, region in districts.items():
    district_cloud_coverage_data = get_district_cloud_coverage(district, region)
    all_cloud_coverage_data.extend(district_cloud_coverage_data)

# Create a DataFrame
df_cloud_coverage = pd.DataFrame(all_cloud_coverage_data)

# Convert the 'date' column to datetime
df_cloud_coverage['date'] = pd.to_datetime(df_cloud_coverage['date'])

# Display the DataFrame
print(df_cloud_coverage)

           date     district  cloud_coverage
0    2020-01-01  Trincomalee        0.872909
1    2020-01-02  Trincomalee        1.000000
2    2020-01-03  Trincomalee        0.754972
3    2020-01-04  Trincomalee        0.908164
4    2020-01-05  Trincomalee        0.586989
...         ...          ...             ...
9120 2020-12-26        Kandy        0.988430
9121 2020-12-27        Kandy        0.950714
9122 2020-12-28        Kandy        0.980566
9123 2020-12-29        Kandy        0.527874
9124 2020-12-30        Kandy        0.645972

[9125 rows x 3 columns]


In [None]:
df_cloud_coverage.set_index('date', inplace=True)
df_cloud = df_cloud_coverage.groupby('district').resample('ME').mean()
df_cloud.reset_index(inplace=True)
df_cloud = df_cloud.sort_values(by=['date', 'district']).reset_index(drop=True)
df_cloud = df_cloud[['date', 'district', 'cloud_coverage']]
df_cloud=df_cloud.rename(columns={
    'cloud_coverage':'cloud coverage',
    
})

In [None]:
df_combined = pd.merge(df_combined,df_cloud, on=['date', 'district'])
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215,22.030324,100136.977600,82.086512,0.729976
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456,21.939477,100172.873559,80.339398,0.753856
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555,22.105364,100023.332831,74.769917,0.657447
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681,23.686100,99959.218347,77.240012,0.586747
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194,24.423849,99654.787896,76.729817,0.621787
...,...,...,...,...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143,22.406440,100499.844328,71.232829,0.537118
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779,24.209140,100428.697945,73.290690,0.609212
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750,25.380402,100139.145874,78.468824,0.687780
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719,24.439711,100069.469044,72.652221,0.636293


# SOLAR RADIATION

In [None]:
# Define the date range (extended to include the last day)
start_date = '2021-01-01'
end_date = '2021-12-31'  # Extend by one day to include May 31

# Function to extract solar radiation data for each image and region
def extract_solar_radiation(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    solar_radiation = image.select('DSR').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=5000).get('DSR')
    return ee.Feature(None, {'date': date, 'solar_radiation': solar_radiation})

# Function to get solar radiation data for each district
def get_district_solar_radiation(district, region):
    dataset = ee.ImageCollection('MODIS/061/MCD18A1') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    solar_radiation_features = dataset.map(lambda image: extract_solar_radiation(image, region)).getInfo()
    
    solar_radiation_data = [{'date': feature['properties']['date'], 
                             'district': district,
                             'solar_radiation': feature['properties'].get('solar_radiation', None)} 
                            for feature in solar_radiation_features['features']]
    
    return solar_radiation_data

# Collect solar radiation data for all districts
all_solar_radiation_data = []
for district, region in districts.items():
    district_solar_radiation_data = get_district_solar_radiation(district, region)
    all_solar_radiation_data.extend(district_solar_radiation_data)

# Create a DataFrame
df_solar_radiation = pd.DataFrame(all_solar_radiation_data)

# Convert the 'date' column to datetime
df_solar_radiation['date'] = pd.to_datetime(df_solar_radiation['date'])

# Save the DataFrame to a CSV file
# df_solar_radiation.to_csv('district_solar_radiation_2011.csv', index=False)

# Display the DataFrame
print(df_solar_radiation)

           date     district  solar_radiation
0    2020-01-01  Trincomalee       727.484895
1    2020-01-02  Trincomalee       334.004070
2    2020-01-03  Trincomalee       772.350872
3    2020-01-04  Trincomalee       629.194229
4    2020-01-05  Trincomalee       704.646582
...         ...          ...              ...
9120 2020-12-26        Kandy       379.040831
9121 2020-12-27        Kandy       423.042559
9122 2020-12-28        Kandy       393.089653
9123 2020-12-29        Kandy       743.724420
9124 2020-12-30        Kandy       681.728036

[9125 rows x 3 columns]


In [None]:
df_solar_radiation.set_index('date', inplace=True)
df_radiation = df_solar_radiation.groupby('district').resample('ME').mean()
df_radiation.reset_index(inplace=True)
df_radiation = df_radiation.sort_values(by=['date', 'district']).reset_index(drop=True)
df_radiation= df_radiation[['date', 'district', 'solar_radiation']]
df_radiation=df_radiation.rename(columns={
    'solar_radiation':'solar radiation',
    
})

In [None]:
df_combined = pd.merge(df_combined,df_radiation, on=['date', 'district'])
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215,22.030324,100136.977600,82.086512,0.729976,610.151188
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456,21.939477,100172.873559,80.339398,0.753856,604.735088
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555,22.105364,100023.332831,74.769917,0.657447,723.050410
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681,23.686100,99959.218347,77.240012,0.586747,770.067486
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194,24.423849,99654.787896,76.729817,0.621787,681.313952
...,...,...,...,...,...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143,22.406440,100499.844328,71.232829,0.537118,793.402548
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779,24.209140,100428.697945,73.290690,0.609212,805.273223
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750,25.380402,100139.145874,78.468824,0.687780,689.369725
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719,24.439711,100069.469044,72.652221,0.636293,707.561118


# ELEVATION

In [None]:
# Function to extract elevation data for each region
def extract_elevation(region):
    elevation = ee.Image('USGS/SRTMGL1_003')

    min_elevation = elevation.reduceRegion(
        reducer=ee.Reducer.min(), geometry=region, scale=30).get('elevation')
    mean_elevation = elevation.reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=30).get('elevation')
    max_elevation = elevation.reduceRegion(
        reducer=ee.Reducer.max(), geometry=region, scale=30).get('elevation')

    return {
        'min_elevation': min_elevation.getInfo(),
        'mean_elevation': mean_elevation.getInfo(),
        'max_elevation': max_elevation.getInfo()
    }

# Collect elevation data for all districts
all_elevation_data = []
for district, region in districts.items():
    elevation_data = extract_elevation(region)
    elevation_data['district'] = district
    all_elevation_data.append(elevation_data)

# Create a DataFrame
df_elevation = pd.DataFrame(all_elevation_data)

# Display the DataFrame
print(df_elevation)


    min_elevation  mean_elevation  max_elevation      district
0             -35       31.207815            255   Trincomalee
1              -9       41.085049             97    Mullaitivu
2             -19        6.616899             34        Jaffna
3             -17       12.929901             59   Kilinochchi
4             -14       31.839197            105        Mannar
5             -21       31.536883            252      Puttalam
6             -11       34.520424            306       Gampaha
7             -13       36.288229            452       Colombo
8             -16       64.713164            645      Kalutara
9             -11       71.123374            831         Galle
10             -7      152.594695           1183        Matara
11            -12       47.001056            705    Hambantota
12            -20       65.859763            858        Ampara
13            -11       26.011840            385    Batticaloa
14             -1      378.429042           2202     Ra

In [None]:
df_elevation= df_elevation[['district', 'min_elevation', 'max_elevation','mean_elevation']]
df_elevation=df_elevation.rename(columns={
    'min_elevation':'min elevation',
    'max_elevation':'max elevation',
    'mean_elevation':'mean elevation'
    
})

df_elevation

Unnamed: 0,district,min elevation,max elevation,mean elevation
0,Trincomalee,-35,255,31.207815
1,Mullaitivu,-9,97,41.085049
2,Jaffna,-19,34,6.616899
3,Kilinochchi,-17,59,12.929901
4,Mannar,-14,105,31.839197
5,Puttalam,-21,252,31.536883
6,Gampaha,-11,306,34.520424
7,Colombo,-13,452,36.288229
8,Kalutara,-16,645,64.713164
9,Galle,-11,831,71.123374


In [None]:
df_combined = pd.merge(df_combined,df_elevation, on=['district'])
df_combined

Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation,min elevation,max elevation,mean elevation
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215,22.030324,100136.977600,82.086512,0.729976,610.151188,-20,858,65.859763
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456,21.939477,100172.873559,80.339398,0.753856,604.735088,-20,858,65.859763
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555,22.105364,100023.332831,74.769917,0.657447,723.050410,-20,858,65.859763
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681,23.686100,99959.218347,77.240012,0.586747,770.067486,-20,858,65.859763
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194,24.423849,99654.787896,76.729817,0.621787,681.313952,-20,858,65.859763
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143,22.406440,100499.844328,71.232829,0.537118,793.402548,20,190,78.095170
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779,24.209140,100428.697945,73.290690,0.609212,805.273223,20,190,78.095170
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750,25.380402,100139.145874,78.468824,0.687780,689.369725,20,190,78.095170
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719,24.439711,100069.469044,72.652221,0.636293,707.561118,20,190,78.095170


# VEGETATION

In [None]:
# # Define the start and end year
# start_year = 2011
# end_year = 2014

# # Function to extract forest cover data for each image and region
# def extract_forest_cover(image, region):
#     date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
#     tree_cover = image.select('Percent_Tree_Cover').reduceRegion(
#         reducer=ee.Reducer.mean(), geometry=region, scale=250).get('Percent_Tree_Cover')
#     return ee.Feature(None, {'date': date, 'tree_cover': tree_cover})

# # Function to get forest cover data for each district
# def get_district_forest_cover(district, region, start_date, end_date):
#     modis_vcf = ee.ImageCollection('MODIS/006/MOD44B') \
#             .filterDate(start_date, end_date) \
#             .filterBounds(region)
    
#     forest_cover_features = modis_vcf.map(lambda image: extract_forest_cover(image, region)).getInfo()
    
#     forest_cover_data = [{'date': feature['properties']['date'], 
#                          'district': district,
#                          'tree_cover': feature['properties']['tree_cover']} 
#                         for feature in forest_cover_features['features']]
    
#     return forest_cover_data

# # Collect forest cover data for all districts and years
# all_forest_cover_data = []
# for year in range(start_year, end_year + 1):
#     start_date = f'{year}-01-01'
#     end_date = f'{year}-12-31'
#     for district, region in districts.items():
#         district_forest_cover_data = get_district_forest_cover(district, region, start_date, end_date)
#         all_forest_cover_data.extend(district_forest_cover_data)

# # Create a DataFrame for forest cover data
# df_forest_cover = pd.DataFrame(all_forest_cover_data)

# # Ensure the 'date' column is in datetime format
# df_forest_cover['date'] = pd.to_datetime(df_forest_cover['date'])

# print(df_forest_cover)

In [None]:
# Define the year of interest
year = 2021

# Function to extract forest cover data for a given region
def extract_forest_cover(image, region):
    date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
    tree_cover = image.select('Percent_Tree_Cover').reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=250).get('Percent_Tree_Cover')
    return ee.Feature(None, {'date': date, 'tree_cover': tree_cover})

# Function to get forest cover data for each district for the year 2012
def get_district_forest_cover(district, region, start_date, end_date):
    modis_vcf = ee.ImageCollection('MODIS/006/MOD44B') \
            .filterDate(start_date, end_date) \
            .filterBounds(region)
    
    forest_cover_features = modis_vcf.map(lambda image: extract_forest_cover(image, region)).getInfo()
    
    forest_cover_data = [{'date': feature['properties']['date'], 
                         'district': district,
                         'tree_cover': feature['properties']['tree_cover']} 
                        for feature in forest_cover_features['features']]
    
    return forest_cover_data

# Collect forest cover data for all districts for the year 2012
all_forest_cover_data = []
start_date = f'{year}-01-01'
end_date = f'{year}-12-31'
for district, region in districts.items():
    district_forest_cover_data = get_district_forest_cover(district, region, start_date, end_date)
    all_forest_cover_data.extend(district_forest_cover_data)

# Create a DataFrame for forest cover data
df_forest_cover = pd.DataFrame(all_forest_cover_data)

# Ensure the 'date' column is in datetime format
df_forest_cover['date'] = pd.to_datetime(df_forest_cover['date'])

print(df_forest_cover)


         date      district  tree_cover
0  2020-03-05   Trincomalee   27.762375
1  2020-03-05    Mullaitivu   34.322920
2  2020-03-05        Jaffna    7.786393
3  2020-03-05   Kilinochchi   18.477445
4  2020-03-05        Mannar   38.206402
5  2020-03-05      Puttalam   31.998116
6  2020-03-05       Gampaha   29.812671
7  2020-03-05       Colombo   24.557155
8  2020-03-05      Kalutara   45.514866
9  2020-03-05         Galle   46.588190
10 2020-03-05        Matara   49.968482
11 2020-03-05    Hambantota   27.565643
12 2020-03-05        Ampara   26.830810
13 2020-03-05    Batticaloa   15.815907
14 2020-03-05     Ratnapura   52.782251
15 2020-03-05    Moneragala   36.565347
16 2020-03-05       Kegalle   53.636309
17 2020-03-05       Badulla   36.523022
18 2020-03-05        Matale   42.425050
19 2020-03-05   Polonnaruwa   30.081764
20 2020-03-05    Kurunegala   30.259982
21 2020-03-05  Anuradhapura   29.167190
22 2020-03-05  Nuwara Eliya   49.007591
23 2020-03-05      Vavuniya   33.641493


In [None]:

import pandas as pd

# Assume df_forest_cover contains the forest coverage data you extracted earlier
# df_forest_cover has columns: 'date', 'district', 'tree_cover'

# Ensure 'date' column in df_combined is in datetime format
df_combined['date'] = pd.to_datetime(df_combined['date'])

# Drop the 'date' column from df_forest_cover since tree_cover is constant for each district
#********for first run only*********
df_forest_cover = df_forest_cover.drop(columns=['date']).drop_duplicates()

# Merge df_combined with df_forest_cover on 'district'
df_combined= df_combined.merge(df_forest_cover, on='district', how='left')

# Verify the result
print(df_combined.head())


        date district    rainfall   min temp   max temp  mean temp  \
0 2020-01-31   Ampara   54.097461  22.033402  29.427500  25.304901   
1 2020-02-29   Ampara   27.578651  22.217799  29.555104  25.573322   
2 2020-03-31   Ampara   16.891400  22.789691  32.078556  26.959707   
3 2020-04-30   Ampara   56.572590  24.085308  33.266407  28.040798   
4 2020-05-31   Ampara  167.530384  25.633958  33.364023  28.917083   

   wind speed  dew point  surface pressure  relative humidity  cloud coverage  \
0    2.207215  22.030324     100136.977600          82.086512        0.729976   
1    2.434456  21.939477     100172.873559          80.339398        0.753856   
2    1.710555  22.105364     100023.332831          74.769917        0.657447   
3    1.284681  23.686100      99959.218347          77.240012        0.586747   
4    1.531194  24.423849      99654.787896          76.729817        0.621787   

   solar radiation  min elevation  max elevation  mean elevation  tree_cover  
0       610.1

In [None]:
# df_forest_cover=df_forest_cover.rename(columns={
#     'tree_cover':'tree cover'
# })

In [None]:
# df_combined['year']=df_combined['date'].dt.year
# df_forest_cover['year']=df_forest_cover['date'].dt.year
# df_combined = pd.merge(df_combined,df_forest_cover,on=['district','year'])
# df_combined.drop(columns=['year'], inplace=True)
# df_combined

In [None]:
# df_combined.drop(columns=['date_y'],inplace=True)

# df_combined=df_combined.rename(columns={
#     'date_x':'date'
# })

In [None]:
# df_combined

# urbanization ***

In [None]:
# Initialize the Earth Engine module
ee.Initialize()

# Define the year of interest
year = 2021

# Function to extract urbanization data for a given region
def extract_urbanization(image, region):
    built_up_area = image.reduceRegion(
        reducer=ee.Reducer.mean(), geometry=region, scale=250).get('built')
    return ee.Feature(None, {'urbanization': built_up_area})

# Function to get urbanization data for each district
def get_district_urbanization(district, region):
    ghsl = ee.Image('JRC/GHSL/P2016/BUILT_LDSMT_GLOBE_V1').select('built').clip(region)

    # Extract urbanization data (without date since the dataset is not time-specific)
    urbanization_feature = extract_urbanization(ghsl, region).getInfo()
    
    urbanization_data = [{'district': district,
                          'urbanization': urbanization_feature['properties']['urbanization']}]
    
    return urbanization_data
# Collect urbanization data for all districts
all_urbanization_data = []
for district, region in districts.items():
    district_urbanization_data = get_district_urbanization(district, region)
    all_urbanization_data.extend(district_urbanization_data)

# Create a DataFrame for urbanization data
df_urbanization = pd.DataFrame(all_urbanization_data)

print(df_urbanization)



        district  urbanization
0    Trincomalee      1.961834
1     Mullaitivu      1.982771
2         Jaffna      2.301368
3    Kilinochchi      1.982390
4         Mannar      1.988092
5       Puttalam      1.991564
6        Gampaha      2.477466
7        Colombo      3.033678
8       Kalutara      2.092828
9          Galle      2.092502
10        Matara      2.085183
11    Hambantota      2.014646
12        Ampara      1.989998
13    Batticaloa      1.982883
14     Ratnapura      2.006055
15    Moneragala      1.993537
16       Kegalle      2.025800
17       Badulla      2.022577
18        Matale      2.008280
19   Polonnaruwa      1.991564
20    Kurunegala      2.021875
21  Anuradhapura      1.992455
22  Nuwara Eliya      2.060603
23      Vavuniya      1.998551
24         Kandy      2.047060


In [None]:
df_combined= df_combined.merge(df_urbanization, on='district', how='left')
df_combined


Unnamed: 0,date,district,rainfall,min temp,max temp,mean temp,wind speed,dew point,surface pressure,relative humidity,cloud coverage,solar radiation,min elevation,max elevation,mean elevation,tree_cover,urbanization
0,2020-01-31,Ampara,54.097461,22.033402,29.427500,25.304901,2.207215,22.030324,100136.977600,82.086512,0.729976,610.151188,-20,858,65.859763,26.830810,1.989998
1,2020-02-29,Ampara,27.578651,22.217799,29.555104,25.573322,2.434456,21.939477,100172.873559,80.339398,0.753856,604.735088,-20,858,65.859763,26.830810,1.989998
2,2020-03-31,Ampara,16.891400,22.789691,32.078556,26.959707,1.710555,22.105364,100023.332831,74.769917,0.657447,723.050410,-20,858,65.859763,26.830810,1.989998
3,2020-04-30,Ampara,56.572590,24.085308,33.266407,28.040798,1.284681,23.686100,99959.218347,77.240012,0.586747,770.067486,-20,858,65.859763,26.830810,1.989998
4,2020-05-31,Ampara,167.530384,25.633958,33.364023,28.917083,1.531194,24.423849,99654.787896,76.729817,0.621787,681.313952,-20,858,65.859763,26.830810,1.989998
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
170,2020-03-31,Vavuniya,7.497944,23.516430,33.461552,28.100214,2.335143,22.406440,100499.844328,71.232829,0.537118,793.402548,20,190,78.095170,33.641493,1.998551
171,2020-04-30,Vavuniya,65.149132,25.558257,34.637041,29.487052,1.710779,24.209140,100428.697945,73.290690,0.609212,805.273223,20,190,78.095170,33.641493,1.998551
172,2020-05-31,Vavuniya,147.882344,26.783664,33.291558,29.517250,3.273750,25.380402,100139.145874,78.468824,0.687780,689.369725,20,190,78.095170,33.641493,1.998551
173,2020-06-30,Vavuniya,5.482827,27.109948,33.777595,29.878702,4.928719,24.439711,100069.469044,72.652221,0.636293,707.561118,20,190,78.095170,33.641493,1.998551


In [None]:
# # Define the time range
# years = [2011, 2012, 2013, 2014]

# # Function to extract urbanization data for each image and region
# def extract_urbanization(image, region):
#     date = ee.Date(image.get('system:time_start')).format('YYYY-MM-dd')
#     urban_area = image.reduceRegion(
#         reducer=ee.Reducer.mean(), geometry=region, scale=500).get('built_surface')
#     return ee.Feature(None, {'date': date, 'urban_area': urban_area})

# # Function to get urbanization data for each district
# def get_district_urbanization(district, region):
#     ghsl =ee.ImageCollection("JRC/GHSL/P2023A/GHS_BUILT_S") \
#             .filterBounds(region) \
#             .map(lambda img: img.clip(region)) \
#             .select('built_surface')
    
#     urbanization_features = ghsl.map(lambda image: extract_urbanization(image, region)).getInfo()
    
#     urbanization_data = [{'date': feature['properties']['date'], 
#                           'district': district,
#                           'urban_area': feature['properties']['urban_area']} 
#                          for feature in urbanization_features['features']]
    
#     return urbanization_data

# # Collect urbanization data for all districts and years
# all_urbanization_data = []
# for district, region in districts.items():
#     for year in years:
#         start_date = f'{year}-01-01'
#         end_date = f'{year}-12-31'
#         urbanization_data = get_district_urbanization(district, region)
#         all_urbanization_data.extend(urbanization_data)

# # Create a DataFrame
# df_urbanization = pd.DataFrame(all_urbanization_data)

# # Convert the 'date' column to datetime
# df_urbanization['date'] = pd.to_datetime(df_urbanization['date'])

# # df_urbanization.to_csv('district_urbanization.csv', index=False)

# # Display the DataFrame
# print(df_urbanization)

In [None]:
# df_combined.to_excel('_dengue_dataset_2020.xlsx',index=False)