In [1]:
import os
import xarray as xr
import geopandas as gpd
import numpy as np
import pandas as pd
from scipy.spatial import cKDTree

# Paths
grid_path = r"C:\Users\purav\OneDrive\Desktop\Fi Year Project\Final-Year-Project\Data\Shapefile\India Shape\grid.shp"
rainfall_folder = r"C:\Users\purav\OneDrive\Desktop\Fi Year Project\Final-Year-Project\Data\Rainfall"
temperature_folder = r"C:\Users\purav\OneDrive\Desktop\Fi Year Project\Final-Year-Project\Data\Temperature"
output_csv_path = r"C:\Users\purav\OneDrive\Desktop\Fi Year Project\Final-Year-Project\Data\Processed\climate_data.csv"

# Load Grid
grid = gpd.read_file(grid_path)

# Compute grid centroids for mapping
grid["centroid"] = grid.geometry.centroid
grid_points = np.array([(p.y, p.x) for p in grid["centroid"]])

# Prepare dataframe to store results
data_records = []

# Process each year
for year in range(2000, 2024):
    print(f"Processing data for {year}...")
    
    # Initialize rainfall and temperature mapping results
    rainfall_values = np.full(len(grid), np.nan)
    temperature_values = np.full(len(grid), np.nan)
    
    # Rainfall Processing
    rainfall_file = os.path.join(rainfall_folder, f"RF25_ind{year}_rfp25.nc")
    if os.path.exists(rainfall_file):
        rainfall_ds = xr.open_dataset(rainfall_file)
        avg_rainfall = rainfall_ds["RAINFALL"].mean(dim="TIME").compute().fillna(0)
        lat = rainfall_ds["LATITUDE"].values
        lon = rainfall_ds["LONGITUDE"].values
        rainfall_points = np.array([(lat[i], lon[j]) for i in range(len(lat)) for j in range(len(lon))])
        tree = cKDTree(rainfall_points)
        _, idx = tree.query(grid_points)
        rainfall_values = avg_rainfall.values.flatten()[idx]
    else:
        print(f"Rainfall file not found for {year}, skipping...")
    
    # Temperature Processing
    temperature_file = os.path.join(temperature_folder, f"Maxtemp_MaxT_{year}.nc")
    if os.path.exists(temperature_file):
        temperature_ds = xr.open_dataset(temperature_file)
        avg_temperature = temperature_ds["temperature"].mean(dim="time").compute().fillna(0)
        lat = temperature_ds["lat"].values
        lon = temperature_ds["lon"].values
        temperature_points = np.array([(lat[i], lon[j]) for i in range(len(lat)) for j in range(len(lon))])
        tree = cKDTree(temperature_points)
        _, idx = tree.query(grid_points)
        temperature_values = avg_temperature.values.flatten()[idx]
    else:
        print(f"Temperature file not found for {year}, skipping...")
    
    # Store data for the year
    for i in range(len(grid)):
        data_records.append([year, grid_points[i][0], grid_points[i][1], rainfall_values[i], temperature_values[i]])

# Convert data to DataFrame and save as CSV
df = pd.DataFrame(data_records, columns=["year", "latitude", "longitude", "rainfall", "temperature"])
df.to_csv(output_csv_path, index=False)

print(f"Data processing completed! CSV saved at: {output_csv_path}")


  from pandas.core import (


Processing data for 2000...
Processing data for 2001...
Processing data for 2002...
Processing data for 2003...
Processing data for 2004...
Processing data for 2005...
Processing data for 2006...
Processing data for 2007...
Processing data for 2008...
Processing data for 2009...
Processing data for 2010...
Processing data for 2011...
Processing data for 2012...
Processing data for 2013...
Processing data for 2014...
Processing data for 2015...
Processing data for 2016...
Processing data for 2017...
Processing data for 2018...
Processing data for 2019...
Processing data for 2020...
Processing data for 2021...
Processing data for 2022...
Processing data for 2023...
Data processing completed! CSV saved at: C:\Users\purav\OneDrive\Desktop\Fi Year Project\Final-Year-Project\Data\Processed\climate_data.csv
