In [133]:
import rasterio
import numpy as np

# Load the original raster file
with rasterio.open("/Users/kasirajan/Documents/ACF/Modeled Surfaces/Data/Raster/IA2020DHS_AHTOBCMANY_MS_v01/IA2020DHS_AHTOBCMANY_MS_CI_UP.tif") as src:
    original_raster_data = src.read(1)  # read the first and only band

# Mask the nodata values
nodata_value = -3.39999995e+38  # this might need to be adjusted based on your raster's metadata
masked_raster_data = np.ma.masked_where(original_raster_data == nodata_value, original_raster_data)

# Display some statistics
min_value = masked_raster_data.min()
max_value = masked_raster_data.max()
mean_value = masked_raster_data.mean()
median_value = np.ma.median(masked_raster_data)
std_dev = masked_raster_data.std()

# Display a subset of the raster values
subset = masked_raster_data[0:5, 0:5]

min_value, max_value, mean_value, median_value, std_dev, subset


(0.07211822,
 0.2503228,
 0.14893395922627298,
 0.14804519712924957,
 0.021593022841596015,
 masked_array(
   data=[[--, --, --, --, --],
         [--, --, --, --, --],
         [--, --, --, --, --],
         [--, --, --, --, --],
         [--, --, --, --, --]],
   mask=[[ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True],
         [ True,  True,  True,  True,  True]],
   fill_value=1e+20,
   dtype=float32))

In [134]:
import geopandas as gpd
import numpy as np
from shapely.geometry import Polygon

# Define the extents
xmin, xmax, ymin, ymax = 77.0832305, 84.6665608, 23.833299800000006, 30.416630500000004

# Define grid size
grid_size = 0.00898315313

# Calculate number of cells along width and height
n_cells_x = int(np.ceil((xmax - xmin) / grid_size))
n_cells_y = int(np.ceil((ymax - ymin) / grid_size))

# Create the grid
polygons = []
for x in range(n_cells_x):
    for y in range(n_cells_y):
        polygons.append(Polygon([
            (xmin + grid_size * x, ymin + grid_size * y),
            (xmin + grid_size * (x + 1), ymin + grid_size * y),
            (xmin + grid_size * (x + 1), ymin + grid_size * (y + 1)),
            (xmin + grid_size * x, ymin + grid_size * (y + 1))
        ]))

grid_gdf = gpd.GeoDataFrame({'geometry': polygons})
grid_gdf['centroid'] = grid_gdf.geometry.centroid


In [135]:
with rasterio.open(raster_file) as src:
    transform = from_origin(src.bounds.left, src.bounds.top, src.res[0], src.res[1])
    
    # Extract x, y coordinates from centroids
    xy_coords = [(pt.x, pt.y) for pt in grid_gdf['centroid']]
    
    # Sample the raster using the extracted coordinates
    values = [val[0] for val in src.sample(xy_coords, indexes=1)]

grid_gdf['raster_value'] = values


In [136]:
from scipy.spatial import KDTree

# Define the no-data value
nodata_value = -3.39999995e+38

# Extract coordinates of grid cells with no data and their values
no_data_coords = grid_gdf.loc[grid_gdf['raster_value'] == nodata_value, 'centroid'].apply(lambda geom: (geom.x, geom.y)).tolist()

# Extract coordinates of valid raster points and their values
valid_data_coords = grid_gdf.loc[grid_gdf['raster_value'] != nodata_value, 'centroid'].apply(lambda geom: (geom.x, geom.y)).tolist()
valid_data_values = grid_gdf.loc[grid_gdf['raster_value'] != nodata_value, 'raster_value'].tolist()

# Create a KDTree from valid data points
tree = KDTree(valid_data_coords)

# Find the nearest valid data point for each no-data grid cell
distances, indices = tree.query(no_data_coords)

# Assign the raster value from the nearest valid data point to the no-data grid cell
grid_gdf.loc[grid_gdf['raster_value'] == nodata_value, 'raster_value'] = [valid_data_values[i] for i in indices]


In [137]:
# Define the no-data value
nodata_value = -3.39999995e+38

# Count the number of grid cells with valid raster values
valid_count = len(grid_gdf[grid_gdf['raster_value'] != nodata_value])

# Count the number of grid cells with no-data values
nodata_count = len(grid_gdf[grid_gdf['raster_value'] == nodata_value])

valid_count, nodata_count


(619385, 0)

In [138]:
# Display 10 random rows from the grid_gdf
grid_gdf.sample(n=10)


Unnamed: 0,geometry,centroid,raster_value
182613,"POLYGON ((79.32004 24.69568, 79.32902 24.69568...",POINT (79.32453 24.70017),0.13329
44827,"POLYGON ((77.63120 24.85738, 77.64019 24.85738...",POINT (77.63569 24.86187),0.139834
135668,"POLYGON ((78.74511 24.39924, 78.75410 24.39924...",POINT (78.74961 24.40373),0.144545
248797,"POLYGON ((80.12852 26.61808, 80.13750 26.61808...",POINT (80.13301 26.62257),0.147819
107848,"POLYGON ((78.40375 24.70467, 78.41274 24.70467...",POINT (78.40825 24.70916),0.177958
397910,"POLYGON ((81.95210 29.43879, 81.96108 29.43879...",POINT (81.95659 29.44328),0.307421
203170,"POLYGON ((79.57156 24.99213, 79.58055 24.99213...",POINT (79.57606 24.99662),0.120195
267557,"POLYGON ((80.36208 23.94110, 80.37106 23.94110...",POINT (80.36657 23.94559),0.00173
108639,"POLYGON ((78.41274 25.22569, 78.42172 25.22569...",POINT (78.41723 25.23018),0.141721
220040,"POLYGON ((79.77818 25.09094, 79.78716 25.09094...",POINT (79.78267 25.09543),0.130711


In [139]:
district_gdf = gpd.read_file('/Users/kasirajan/Documents/ACF/District Boundaries/uttarpradesh.geojson')


In [140]:
joined_gdf = gpd.sjoin(grid_gdf, district_gdf, how="inner", op="intersects")


  if await self.run_code(code, result, async_=asy):
Use `to_crs()` to reproject one of the input geometries to match the CRS of the other.

Left CRS: None
Right CRS: EPSG:4326

  joined_gdf = gpd.sjoin(grid_gdf, district_gdf, how="inner", op="intersects")


In [141]:
print(district_gdf.columns)


Index(['id', 'dt_code', 'district', 'st_code', 'year', 'st_nm', 'geometry'], dtype='object')


In [142]:
sample_district_data = joined_gdf[joined_gdf['district'] == "Agra"]
print(sample_district_data.head())  # Display the first few rows of the sample district data


                                                geometry  \
25987  POLYGON ((77.39764 26.81571, 77.40662 26.81571...   
25988  POLYGON ((77.39764 26.82469, 77.40662 26.82469...   
25989  POLYGON ((77.39764 26.83367, 77.40662 26.83367...   
26719  POLYGON ((77.40662 26.80672, 77.41561 26.80672...   
26720  POLYGON ((77.40662 26.81571, 77.41561 26.81571...   

                        centroid  raster_value  index_right  id dt_code  \
25987  POINT (77.40213 26.82020)      0.154320           23 NaN     146   
25988  POINT (77.40213 26.82918)      0.154320           23 NaN     146   
25989  POINT (77.40213 26.83816)      0.149235           23 NaN     146   
26719  POINT (77.41112 26.81122)      0.154320           23 NaN     146   
26720  POINT (77.41112 26.82020)      0.154320           23 NaN     146   

      district st_code    year          st_nm  
25987     Agra      09  2011_c  Uttar Pradesh  
25988     Agra      09  2011_c  Uttar Pradesh  
25989     Agra      09  2011_c  Uttar Prades

In [143]:
import folium
from folium.plugins import FastMarkerCluster

# Filter the GeoDataFrame to get only the rows corresponding to the district of Agra
agra_gdf = joined_gdf[joined_gdf['district'] == 'Agra']

In [144]:
# Calculate statistics for the raster values in Agra
min_value = agra_gdf['raster_value'].min()
max_value = agra_gdf['raster_value'].max()
mean_value = agra_gdf['raster_value'].mean()
median_value = agra_gdf['raster_value'].median()
q25_value = agra_gdf['raster_value'].quantile(0.25)
q75_value = agra_gdf['raster_value'].quantile(0.75)

min_value, max_value, mean_value, median_value, q25_value, q75_value


(0.12188551,
 0.18738818,
 0.15120117,
 0.15131944,
 0.1428917944431305,
 0.1591118574142456)

In [157]:
# Create a base map centered around Agra
m = folium.Map(location=[agra_gdf['centroid'].iloc[0].y, agra_gdf['centroid'].iloc[0].x], zoom_start=10, tiles='cartodb positron')

# Define a function to assign colors based on raster values
def assign_color(value):
    """Assign a color based on the raster value."""
    if value < 0.143:
        return '#add8e6'  # light blue
    elif 0.143 <= value < 0.151:
        return '#1e90ff'  # medium blue
    elif 0.151 <= value < 0.159:
        return '#00008b'  # dark blue
    else:
        return '#000080'  # very dark blue

# Add each grid cell to the map with a color that corresponds to its raster value
for idx, row in agra_gdf.iterrows():
    color = assign_color(row['raster_value'])
    folium.GeoJson(row['geometry'], style_function=lambda x, color=color: {'fillColor': color, 'color': color}).add_to(m)

# Create a custom HTML legend
legend_html = """
<div style="position: fixed; bottom: 50px; left: 50px; z-index: 9999; background-color: white; padding: 10px; border: 2px solid black;">
    <p><span style="background-color: #add8e6; padding: 10px;">&nbsp;</span> < 0.143</p>
    <p><span style="background-color: #1e90ff; padding: 10px;">&nbsp;</span> 0.143 - 0.151</p>
    <p><span style="background-color: #00008b; padding: 10px;">&nbsp;</span> 0.151 - 0.159</p>
    <p><span style="background-color: #000080; padding: 10px;">&nbsp;</span> >= 0.159</p>
</div>
"""

# Add the custom legend to the map
m.get_root().html.add_child(folium.Element(legend_html))

# Display the map
m

In [148]:
# Display the data types of each column in the district_gdf
joined_gdf.dtypes


geometry        geometry
centroid        geometry
raster_value     float32
index_right        int64
id               float64
dt_code           object
district          object
st_code           object
year              object
st_nm             object
dtype: object

In [149]:
# Print the first few rows of the district_gdf
joined_gdf.head()


Unnamed: 0,geometry,centroid,raster_value,index_right,id,dt_code,district,st_code,year,st_nm
640,"POLYGON ((77.08323 29.58252, 77.09221 29.58252...",POINT (77.08772 29.58701),0.117196,71,,704,Shamli,9,update2014,Uttar Pradesh
1365,"POLYGON ((77.09221 29.51065, 77.10120 29.51065...",POINT (77.09671 29.51514),0.111477,71,,704,Shamli,9,update2014,Uttar Pradesh
1366,"POLYGON ((77.09221 29.51964, 77.10120 29.51964...",POINT (77.09671 29.52413),0.111477,71,,704,Shamli,9,update2014,Uttar Pradesh
1367,"POLYGON ((77.09221 29.52862, 77.10120 29.52862...",POINT (77.09671 29.53311),0.111477,71,,704,Shamli,9,update2014,Uttar Pradesh
1372,"POLYGON ((77.09221 29.57353, 77.10120 29.57353...",POINT (77.09671 29.57803),0.109211,71,,704,Shamli,9,update2014,Uttar Pradesh


In [150]:
print(joined_gdf['district'].unique())



['Shamli' 'Saharanpur' 'Baghpat' 'Ghaziabad' 'Muzaffarnagar' 'Mathura'
 'Gautam Buddha Nagar' 'Agra' 'Meerut' 'Aligarh' 'Hapur' 'Bulandshahr'
 'Hathras' 'Bijnor' 'Amroha' 'Lalitpur' 'Etah' 'Firozabad' 'Sambhal'
 'Jhansi' 'Kasganj' 'Moradabad' 'Budaun' 'Mainpuri' 'Etawah' 'Rampur'
 'Jalaun' 'Bareilly' 'Farrukhabad' 'Auraiya' 'Mahoba' 'Kannauj'
 'Shahjahanpur' 'Hamirpur' 'Kanpur Dehat' 'Pilibhit' 'Hardoi'
 'Kanpur Nagar' 'Kheri' 'Unnao' 'Banda' 'Fatehpur' 'Sitapur' 'Lucknow'
 'Rae Bareli' 'Chitrakoot' 'Bara Banki' 'Bahraich' 'Kaushambi' 'Amethi'
 'Pratapgarh' 'Prayagraj' 'Gonda' 'Faizabad' 'Shrawasti' 'Sultanpur'
 'Balrampur' 'Mirzapur' 'Jaunpur' 'Bhadohi' 'Ambedkar Nagar' 'Basti'
 'Siddharthnagar' 'Sonbhadra' 'Varanasi' 'Azamgarh' 'Sant Kabir Nagar'
 'Chandauli' 'Ghazipur' 'Gorakhpur' 'Mahrajganj' 'Mau' 'Deoria'
 'Kushinagar' 'Ballia']


In [152]:
import os

# Define the output directory where you want to save the GeoJSON files
output_directory = "/Users/kasirajan/Documents/ACF/Modeled Surfaces/Output/NHS20_AH_TOBC_Uttar_Pradesh"

# Convert the centroid column to WKT representation
joined_gdf['centroid'] = joined_gdf['centroid'].astype(str)

# Iterate over each unique district in the joined_gdf
for district in joined_gdf['district'].unique():
    # Filter the rows for the current district
    district_gdf = joined_gdf[joined_gdf['district'] == district]
    
    # Define the output file path
    filename = district.replace(' ', '_') + ".geojson"
    output_path = os.path.join(output_directory, filename)
    
    # Export to GeoJSON
    district_gdf.to_file(output_path, driver='GeoJSON')

print("Export completed!")


Export completed!
