In [19]:
import os
import rasterio
from rasterio.features import geometry_mask
import geopandas as gpd

# Define input paths
input_raster_path = '/Users/kasirajan/Documents/ACF/Modeled Surfaces/Data/Raster/IA2020DHS_CHVACCCDP1_MS_v01/IA2020DHS_CHVACCCDP1_MS_CI_v01.tif'
input_geojson_path = '/Users/kasirajan/Documents/ACF/District Boundaries/uttar_pradesh_data.geojson'
output_clipped_path = '/Users/kasirajan/Documents/ACF/Modeled Surfaces/Data/Raster/IA2020DHS_CHVACCCDP1_MS_v01/IA2020DHS_CHVACCCDP1_clipped.tif'

# Open the GeoJSON file with geopandas
gdf = gpd.read_file(input_geojson_path)

# Open the raster file with Rasterio
with rasterio.open(input_raster_path) as src:
    # Generate a mask for the GeoJSON geometry
    mask = geometry_mask(gdf.geometry, out_shape=src.shape, transform=src.transform, invert=True)
    
    # Read the data from the raster that falls within the mask
    clipped_data = src.read(masked=True)
    
    # Update metadata for the clipped raster
    clipped_meta = src.meta.copy()
    clipped_meta.update({'driver': 'GTiff',
                         'height': mask.shape[0],
                         'width': mask.shape[1],
                         'transform': src.transform})

    # Write the clipped raster to the output file
    with rasterio.open(output_clipped_path, 'w', **clipped_meta) as dst:
        dst.write(clipped_data)

print(f"Raster clipped and saved to: {output_clipped_path}")


Raster clipped and saved to: /Users/kasirajan/Documents/ACF/Modeled Surfaces/Data/Raster/IA2020DHS_CHVACCCDP1_MS_v01/IA2020DHS_CHVACCCDP1_clipped.tif


In [20]:
import rasterio
import numpy as np

# Load the original raster file
with rasterio.open(output_clipped_path) as src:
    original_raster_data = src.read(1)  # read the first and only band

# Mask the nodata values
nodata_value = -3.39999995e+38  # this might need to be adjusted based on your raster's metadata
masked_raster_data = np.ma.masked_where(original_raster_data == nodata_value, original_raster_data)

# Display some statistics
min_value = masked_raster_data.min()
max_value = masked_raster_data.max()
mean_value = masked_raster_data.mean()
median_value = np.ma.median(masked_raster_data)
std_dev = masked_raster_data.std()

# Display a subset of the raster values
subset = masked_raster_data[0:5, 0:5]

min_value, max_value, mean_value, median_value, std_dev, subset


(0.012640417,
 0.13327354,
 0.02663108516979769,
 0.02518552541732788,
 0.007434621267155653,
 masked_array(
   data=[[--, --, --, --, --],
         [--, --, --, --, --],
         [--, --, --, --, --],
         [--, --, --, --, --],
         [--, --, --, --, --]],
   mask=[[ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True]],
   fill_value=1e+20,
   dtype=float32))

In [21]:
import geopandas as gpd
import numpy as np
from shapely.geometry import Polygon

# Define the extents
xmin, xmax, ymin, ymax = 77.0832305, 84.6665608, 23.833299800000006, 30.416630500000004

# Define grid size
grid_size = 0.00898315313

# Calculate number of cells along width and height
n_cells_x = int(np.ceil((xmax - xmin) / grid_size))
n_cells_y = int(np.ceil((ymax - ymin) / grid_size))

# Create the grid
polygons = []
for x in range(n_cells_x):
    for y in range(n_cells_y):
        polygons.append(Polygon([
            (xmin + grid_size * x, ymin + grid_size * y),
            (xmin + grid_size * (x + 1), ymin + grid_size * y),
            (xmin + grid_size * (x + 1), ymin + grid_size * (y + 1)),
            (xmin + grid_size * x, ymin + grid_size * (y + 1))
        ]))

grid_gdf = gpd.GeoDataFrame({'geometry': polygons})
grid_gdf['centroid'] = grid_gdf.geometry.centroid


In [22]:
import rasterio
from rasterio.transform import from_origin  # Import from_origin function
import geopandas as gpd
import numpy as np
from shapely.geometry import Polygon

# Define the extents
xmin, xmax, ymin, ymax = 77.0832305, 84.6665608, 23.833299800000006, 30.416630500000004

# Define grid size
grid_size = 0.00898315313

# Calculate number of cells along width and height
n_cells_x = int(np.ceil((xmax - xmin) / grid_size))
n_cells_y = int(np.ceil((ymax - ymin) / grid_size))

# Create the grid
polygons = []
for x in range(n_cells_x):
    for y in range(n_cells_y):
        polygons.append(Polygon([
            (xmin + grid_size * x, ymin + grid_size * y),
            (xmin + grid_size * (x + 1), ymin + grid_size * y),
            (xmin + grid_size * (x + 1), ymin + grid_size * (y + 1)),
            (xmin + grid_size * x, ymin + grid_size * (y + 1))
        ]))

grid_gdf = gpd.GeoDataFrame({'geometry': polygons})
grid_gdf['centroid'] = grid_gdf.geometry.centroid

# Replace 'raster_file' with the path to your clipped raster file
raster_file = output_clipped_path

with rasterio.open(raster_file) as src:
    transform = from_origin(src.bounds.left, src.bounds.top, src.res[0], src.res[1])
    
    # Extract x, y coordinates from centroids
    xy_coords = [(pt.x, pt.y) for pt in grid_gdf['centroid']]
    
    # Sample the raster using the extracted coordinates
    values = [val[0] for val in src.sample(xy_coords, indexes=1)]

grid_gdf['raster_value'] = values


In [23]:
from scipy.spatial import KDTree

# Define the no-data value
nodata_value = -3.39999995e+38

# Extract coordinates of grid cells with no data and their values
no_data_coords = grid_gdf.loc[grid_gdf['raster_value'] == nodata_value, 'centroid'].apply(lambda geom: (geom.x, geom.y)).tolist()

# Extract coordinates of valid raster points and their values
valid_data_coords = grid_gdf.loc[grid_gdf['raster_value'] != nodata_value, 'centroid'].apply(lambda geom: (geom.x, geom.y)).tolist()
valid_data_values = grid_gdf.loc[grid_gdf['raster_value'] != nodata_value, 'raster_value'].tolist()

# Create a KDTree from valid data points
tree = KDTree(valid_data_coords)

# Find the nearest valid data point for each no-data grid cell
distances, indices = tree.query(no_data_coords)

# Assign the raster value from the nearest valid data point to the no-data grid cell
grid_gdf.loc[grid_gdf['raster_value'] == nodata_value, 'raster_value'] = [valid_data_values[i] for i in indices]


In [24]:
# Define the no-data value
nodata_value = -3.39999995e+38

# Count the number of grid cells with valid raster values
valid_count = len(grid_gdf[grid_gdf['raster_value'] != nodata_value])

# Count the number of grid cells with no-data values
nodata_count = len(grid_gdf[grid_gdf['raster_value'] == nodata_value])

valid_count, nodata_count


(619385, 0)

In [25]:
# Display 10 random rows from the grid_gdf
grid_gdf.sample(n=10)


Unnamed: 0,geometry,centroid,raster_value
70766,"POLYGON ((77.94561 27.40859, 77.95460 27.40859...",POINT (77.95010 27.41309),0.020228
506541,"POLYGON ((83.29059 24.17466, 83.29957 24.17466...",POINT (83.29508 24.17915),0.020434
129602,"POLYGON ((78.66427 29.16929, 78.67325 29.16929...",POINT (78.66876 29.17378),0.016977
488642,"POLYGON ((83.06601 28.00148, 83.07499 28.00148...",POINT (83.07050 28.00597),0.027046
79469,"POLYGON ((78.05341 26.57316, 78.06239 26.57316...",POINT (78.05790 26.57765),0.019924
500934,"POLYGON ((83.21872 26.48333, 83.22771 26.48333...",POINT (83.22322 26.48782),0.019933
494986,"POLYGON ((83.14686 25.72875, 83.15584 25.72875...",POINT (83.15135 25.73324),0.018949
258151,"POLYGON ((80.24530 25.04603, 80.25428 25.04603...",POINT (80.24979 25.05052),0.029297
297567,"POLYGON ((80.72141 30.13947, 80.73039 30.13947...",POINT (80.72590 30.14396),0.050116
552584,"POLYGON ((83.84754 29.53760, 83.85653 29.53760...",POINT (83.85204 29.54209),0.035579


In [26]:
district_gdf = gpd.read_file('/Users/kasirajan/Documents/ACF/District Boundaries/uttarpradesh.geojson')


In [27]:
joined_gdf = gpd.sjoin(grid_gdf, district_gdf, how="inner", op="intersects")


  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined_gdf = gpd.sjoin(grid_gdf, district_gdf, how="inner", op="intersects")


In [28]:
print(district_gdf.columns)


Index(['id', 'dt_code', 'district', 'st_code', 'year', 'st_nm', 'geometry'], dtype='object')


In [29]:
sample_district_data = joined_gdf[joined_gdf['district'] == "Agra"]
print(sample_district_data.head())  # Display the first few rows of the sample district data


                                                geometry  \
25987  POLYGON ((77.39764 26.81571, 77.40662 26.81571...   
25988  POLYGON ((77.39764 26.82469, 77.40662 26.82469...   
25989  POLYGON ((77.39764 26.83367, 77.40662 26.83367...   
26719  POLYGON ((77.40662 26.80672, 77.41561 26.80672...   
26720  POLYGON ((77.40662 26.81571, 77.41561 26.81571...   

                        centroid  raster_value  index_right  id dt_code  \
25987  POINT (77.40213 26.82020)      0.019046           23 NaN     146   
25988  POINT (77.40213 26.82918)      0.019046           23 NaN     146   
25989  POINT (77.40213 26.83816)      0.018480           23 NaN     146   
26719  POINT (77.41112 26.81122)      0.019046           23 NaN     146   
26720  POINT (77.41112 26.82020)      0.019046           23 NaN     146   

      district st_code    year          st_nm  
25987     Agra      09  2011_c  Uttar Pradesh  
25988     Agra      09  2011_c  Uttar Pradesh  
25989     Agra      09  2011_c  Uttar Prades

In [30]:
import folium
from folium.plugins import FastMarkerCluster

# Filter the GeoDataFrame to get only the rows corresponding to the district of Agra
agra_gdf = joined_gdf[joined_gdf['district'] == 'Agra']

In [31]:
# Calculate statistics for the raster values in Agra
min_value = agra_gdf['raster_value'].min()
max_value = agra_gdf['raster_value'].max()
mean_value = agra_gdf['raster_value'].mean()
median_value = agra_gdf['raster_value'].median()
q25_value = agra_gdf['raster_value'].quantile(0.25)
q75_value = agra_gdf['raster_value'].quantile(0.75)

min_value, max_value, mean_value, median_value, q25_value, q75_value


(0.018311381,
 0.025129497,
 0.021579979,
 0.021598935,
 0.02070486545562744,
 0.022535979747772217)

In [32]:
# Create a base map centered around Agra
m = folium.Map(location=[agra_gdf['centroid'].iloc[0].y, agra_gdf['centroid'].iloc[0].x], zoom_start=10, tiles='cartodb positron')

# Define a function to assign colors based on raster values
def assign_color(value):
    """Assign a color based on the raster value."""
    if value < 0.020:
        return '#add8e6'  # light blue
    elif 0.020 <= value < 0.021:
        return '#1e90ff'  # medium blue
    elif 0.021 <= value < 0.022:
        return '#00008b'  # dark blue
    else:
        return '#000080'  # very dark blue

# Add each grid cell to the map with a color that corresponds to its raster value
for idx, row in agra_gdf.iterrows():
    color = assign_color(row['raster_value'])
    folium.GeoJson(row['geometry'], style_function=lambda x, color=color: {'fillColor': color, 'color': color}).add_to(m)

# Create a custom HTML legend
legend_html = """
<div style="position: fixed; bottom: 50px; left: 50px; z-index: 9999; background-color: white; padding: 10px; border: 2px solid black;">
    <p><span style="background-color: #add8e6; padding: 10px;">&nbsp;</span> < 0.020</p>
    <p><span style="background-color: #1e90ff; padding: 10px;">&nbsp;</span> 0.020 - 0.021</p>
    <p><span style="background-color: #00008b; padding: 10px;">&nbsp;</span> 0.021 - 0.022</p>
    <p><span style="background-color: #000080; padding: 10px;">&nbsp;</span> >= 0.022</p>
</div>
"""

# Add the custom legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Display the map
m

In [33]:
# Display the data types of each column in the district_gdf
joined_gdf.dtypes


geometry        geometry
centroid        geometry
raster_value     float32
index_right        int64
id               float64
dt_code           object
district          object
st_code           object
year              object
st_nm             object
dtype: object

In [34]:
# Print the first few rows of the district_gdf
joined_gdf.head()


Unnamed: 0,geometry,centroid,raster_value,index_right,id,dt_code,district,st_code,year,st_nm
640,"POLYGON ((77.08323 29.58252, 77.09221 29.58252...",POINT (77.08772 29.58701),0.019383,71,,704,Shamli,9,update2014,Uttar Pradesh
1365,"POLYGON ((77.09221 29.51065, 77.10120 29.51065...",POINT (77.09671 29.51514),0.019257,71,,704,Shamli,9,update2014,Uttar Pradesh
1366,"POLYGON ((77.09221 29.51964, 77.10120 29.51964...",POINT (77.09671 29.52413),0.019257,71,,704,Shamli,9,update2014,Uttar Pradesh
1367,"POLYGON ((77.09221 29.52862, 77.10120 29.52862...",POINT (77.09671 29.53311),0.019257,71,,704,Shamli,9,update2014,Uttar Pradesh
1372,"POLYGON ((77.09221 29.57353, 77.10120 29.57353...",POINT (77.09671 29.57803),0.018993,71,,704,Shamli,9,update2014,Uttar Pradesh


In [35]:
print(joined_gdf['district'].unique())



['Shamli' 'Saharanpur' 'Baghpat' 'Ghaziabad' 'Muzaffarnagar' 'Mathura'
 'Gautam Buddha Nagar' 'Agra' 'Meerut' 'Aligarh' 'Hapur' 'Bulandshahr'
 'Hathras' 'Bijnor' 'Amroha' 'Lalitpur' 'Etah' 'Firozabad' 'Sambhal'
 'Jhansi' 'Kasganj' 'Moradabad' 'Budaun' 'Mainpuri' 'Etawah' 'Rampur'
 'Jalaun' 'Bareilly' 'Farrukhabad' 'Auraiya' 'Mahoba' 'Kannauj'
 'Shahjahanpur' 'Hamirpur' 'Kanpur Dehat' 'Pilibhit' 'Hardoi'
 'Kanpur Nagar' 'Kheri' 'Unnao' 'Banda' 'Fatehpur' 'Sitapur' 'Lucknow'
 'Rae Bareli' 'Chitrakoot' 'Bara Banki' 'Bahraich' 'Kaushambi' 'Amethi'
 'Pratapgarh' 'Prayagraj' 'Gonda' 'Faizabad' 'Shrawasti' 'Sultanpur'
 'Balrampur' 'Mirzapur' 'Jaunpur' 'Bhadohi' 'Ambedkar Nagar' 'Basti'
 'Siddharthnagar' 'Sonbhadra' 'Varanasi' 'Azamgarh' 'Sant Kabir Nagar'
 'Chandauli' 'Ghazipur' 'Gorakhpur' 'Mahrajganj' 'Mau' 'Deoria'
 'Kushinagar' 'Ballia']


In [39]:
import os

# Define the output directory where you want to save the GeoJSON files
output_directory = "/Users/kasirajan/Documents/ACF/Modeled Surfaces/Output/IA2020DHS_CHVACCCDP1_Uttar_Pradesh"

# Convert the centroid column to WKT representation
joined_gdf['centroid'] = joined_gdf['centroid'].astype(str)

# Iterate over each unique district in the joined_gdf
for district in joined_gdf['district'].unique():
    # Filter the rows for the current district
    district_gdf = joined_gdf[joined_gdf['district'] == district]
    
    # Define the output file path
    filename = district.replace(' ', '_') + ".geojson"
    output_path = os.path.join(output_directory, filename)
    
    # Export to GeoJSON
    district_gdf.to_file(output_path, driver='GeoJSON')

print("Export completed!")


Export completed!
