### Import Libraries

In [None]:
import dask
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
import xarray as xr
from tqdm import tqdm
from os import listdir
from os.path import isfile, join

### Initialize dask client
Dask is imported to handle large datasets efficiently by leveraging parallel computing.

In [None]:
from dask.distributed import Client
client = Client(n_workers=64, threads_per_worker=2, memory_limit='128GB')
client

A Dask client is initialized to manage distributed computation. The client object provides access to the dashboard for monitoring tasks.

### Load the datasets 

In [None]:
mds_wind = xr.open_mfdataset('/scratch/vishald/jrf2_monsoonlab/shiv/download_data/Wind_*.nc',combine = 'nested',concat_dim = 'time', parallel=True, chunks={'longitude': 60, 'latitude': 60})

In [None]:
u = mds_wind['u100']
v = mds_wind['v100']
speed = (u*u + v*v)**0.5
wpd_all = 0.5*1.2*(speed**3) #wpd_all now represents the wind power density(W/m2)
wpd_all

### Computing Wind statistics

In [None]:
wind_mean_all = wpd_all.mean('time').compute()

In [None]:
wind_mean_all.to_netcdf(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind Mean.nc')

In [None]:
wind_med = wpd_all.median('time').compute() #Compute median

In [None]:
wind_med.to_netcdf(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind Median.nc')

In [None]:
%%time
#MemoryManagement
import gc
client.run(gc.collect)
 
import ctypes
def trim_memory() -> int:
    libc = ctypes.CDLL("libc.so.6")
    return libc.malloc_trim(0)
client.run(trim_memory)

wind_median = xr.open_dataarray(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind Median.nc')
print('Median Loaded')
#Computation for RCoV
wind_rcov = ((abs(wpd_all - wind_median)).median('time',skipna=True).compute())/wind_median
wind_rcov.to_netcdf(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind RCoV.nc')

In [None]:
%%time
import gc
client.run(gc.collect)
 
import ctypes
def trim_memory() -> int:
    libc = ctypes.CDLL("libc.so.6")
    return libc.malloc_trim(0)
client.run(trim_memory)
# -------------------------------
# Wind Availability Calculation
# -------------------------------
wind_th = 240 # Threshold in W/m² (adjust based on solar energy requirements)
# Calculate total hours
time_hours = wpd_all.count(dim='time').load()
# Calculate hours where solar radiation exceeds operational threshold (170 W/m²)
wind_avail = wpd_all.where(wpd_all > wind_th).count(dim='time').compute()
# Calculate percentage availability during daytime
pct_wind_avail = (wind_avail/time_hours)*100
pct_wind_avail.to_netcdf(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind Availability.nc')
print('Done')

In [None]:
# --- Calculate Interquartile Range (IQR) for wind data ---

# Free up memory using garbage collection on all Dask workers
import gc
client.run(gc.collect)

# Run malloc_trim to free unused memory back to the OS
import ctypes
def trim_memory() -> int:
    libc = ctypes.CDLL("libc.so.6")
    return libc.malloc_trim(0)
client.run(trim_memory)

# Calculate 75th percentile (Q3) of wind data over time
# 'chunk' is used for efficient Dask computation
q75 = wpd_all.chunk({"time": -1, "latitude": "auto", "longitude": "auto"}).quantile(0.75, 'time')
print("75Th complete. Calculating 75th")
q75
q75 = q75.compute()

# Save the computed 75th percentile to disk as a NetCDF file
q75.to_netcdf(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind QSevenFive.nc')

# Clean up memory again before computing the next percentile
client.run(gc.collect)
client.run(trim_memory)

# Calculate 25th percentile (Q1) of solar data over time
q25 = wpd_all.chunk({"time": -1, "latitude": "auto", "longitude": "auto"}).quantile(0.25, 'time')
print("25Th complete. Calculating 75th")
q25
q25 = q25.compute()

# Save the computed 25th percentile to disk as a NetCDF file
q25.to_netcdf(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind QTwoFive.nc')

# Reload the saved percentiles from disk
q25 = xr.open_dataarray(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind QTwoFive.nc')
q75 = xr.open_dataarray(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind QSevenFive.nc')

# Compute Interquartile Range (IQR = Q3 - Q1)
iqr = q75 - q25
iqr

# Save the IQR as a NetCDF file
iqr.to_netcdf(f'/scratch/vishald/jrf2_monsoonlab/shiv/datastats/Entire Wind IQR.nc')
