# Load Packages

In [1]:
import os
import xarray as xr
import pandas as pd

In [2]:
print(xr.backends.list_engines())

{'netcdf4': <NetCDF4BackendEntrypoint>
  Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using netCDF4 in Xarray
  Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.NetCDF4BackendEntrypoint.html, 'h5netcdf': <H5netcdfBackendEntrypoint>
  Open netCDF (.nc, .nc4 and .cdf) and most HDF5 files using h5netcdf in Xarray
  Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.H5netcdfBackendEntrypoint.html, 'scipy': <ScipyBackendEntrypoint>
  Open netCDF files (.nc, .nc4, .cdf and .gz) using scipy in Xarray
  Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.ScipyBackendEntrypoint.html, 'store': <StoreBackendEntrypoint>
  Open AbstractDataStore instances in Xarray
  Learn more at https://docs.xarray.dev/en/stable/generated/xarray.backends.StoreBackendEntrypoint.html}


# Transform VHP files

## Test

In [20]:
ds = xr.open_dataset('../vhp/VHP.G04.C07.j01.P2020001.ND.nc')
df_nd = ds.to_dataframe().reset_index()

In [21]:
df_nd.drop(['HEIGHT', 'WIDTH', 'QA', 'PLATE_CARREE'], axis=1, errors='ignore', inplace=True)

In [22]:
print(df_nd.tail())

          NDVI  BT4   latitude   longitude
36159995   NaN  NaN -55.133991  179.837982
36159996   NaN  NaN -55.133991  179.873978
36159997   NaN  NaN -55.133991  179.909988
36159998   NaN  NaN -55.133991  179.945984
36159999   NaN  NaN -55.133991  179.981979


In [23]:
ds = xr.open_dataset('../vhp/VHP.G04.C07.j01.P2020001.VH.nc')
df_vh = ds.to_dataframe().reset_index()

In [24]:
df_vh.drop(['HEIGHT', 'WIDTH', 'QA', 'PLATE_CARREE'], axis=1, errors='ignore', inplace=True)

In [25]:
print(df_vh.tail())

          VCI  TCI  VHI   latitude   longitude
36159995  NaN  NaN  NaN -55.133991  179.837982
36159996  NaN  NaN  NaN -55.133991  179.873978
36159997  NaN  NaN  NaN -55.133991  179.909988
36159998  NaN  NaN  NaN -55.133991  179.945984
36159999  NaN  NaN  NaN -55.133991  179.981979


In [26]:
dt = pd.merge(df_nd, df_vh, on=['latitude', 'longitude'], suffixes=('_ND', '_VH'))

In [27]:
print(dt.tail())

          NDVI  BT4   latitude   longitude  VCI  TCI  VHI
36159995   NaN  NaN -55.133991  179.837982  NaN  NaN  NaN
36159996   NaN  NaN -55.133991  179.873978  NaN  NaN  NaN
36159997   NaN  NaN -55.133991  179.909988  NaN  NaN  NaN
36159998   NaN  NaN -55.133991  179.945984  NaN  NaN  NaN
36159999   NaN  NaN -55.133991  179.981979  NaN  NaN  NaN


## Creating Week CSV

In [10]:
os.getcwd()

'e:\\SAMS\\notebooks'

In [7]:
vhp = os.path.dirname(os.getcwd())
vhp = os.path.join(vhp, "vhp")
print(vhp)

e:\SAMS\vhp


In [28]:
cf = os.path.join(vhp, "csv")
os.makedirs(cf, exist_ok=True)

In [42]:
mf = os.path.join(vhp, "monthly")
os.makedirs(mf, exist_ok=True)

### Main functions

In [39]:
def openTransformMergeNDVH(np: str, vp: str):
    ds_nd = xr.open_dataset(np)
    ds_vh = xr.open_dataset(vp)
    
    df_nd = ds_nd.to_dataframe().reset_index()
    df_vh = ds_vh.to_dataframe().reset_index()

    # drop useless variables
    df_nd.drop(['HEIGHT', 'WIDTH', 'QA', 'PLATE_CARREE'], axis=1, errors='ignore', inplace=True)
    df_vh.drop(['HEIGHT', 'WIDTH', 'QA', 'PLATE_CARREE'], axis=1, errors='ignore', inplace=True)

    # drop na lines for interest variables
    df_nd.dropna(subset=['NDVI', 'BT4'], inplace=True)
    df_vh.dropna(subset=['VCI', 'TCI', 'VHI'], inplace=True)
    
    merged_df = pd.merge(df_nd, df_vh, on=['latitude', 'longitude'], suffixes=('_ND', '_VH'))
    return merged_df

In [40]:
def saveNDVH(np: str, vp: str, week: str, nd_file: str, vh_file: str):
    if os.path.exists(np) and os.path.exists(vp):
        df = openTransformMergeNDVH(np, vp)
        csv_filename = f"VHP.G04.C07.j01.P2020{week}.csv"
        csv_path = os.path.join(cf, csv_filename)
        df.to_csv(csv_path, index=False)
        
        print(f"Saved merged data for week {week} to {csv_path}")
    else:
        print(f"One or both files missing for week {week}: {nd_file}, {vh_file}")

In [41]:
for week in range(1, 49):
    week_str = str(week).zfill(3)
    nd_file = f"VHP.G04.C07.j01.P2020{week_str}.ND.nc"
    vh_file = f"VHP.G04.C07.j01.P2020{week_str}.VH.nc"
    
    nd_path = os.path.join(vhp, nd_file)
    vh_path = os.path.join(vhp, vh_file)
    
    saveNDVH(nd_path, vh_path, week_str, nd_file, vh_file)

Saved merged data for week 001 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020001.csv
Saved merged data for week 002 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020002.csv
Saved merged data for week 003 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020003.csv
Saved merged data for week 004 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020004.csv
Saved merged data for week 005 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020005.csv
Saved merged data for week 006 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020006.csv
Saved merged data for week 007 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020007.csv
Saved merged data for week 008 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020008.csv
Saved merged data for week 009 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020009.csv
Saved merged data for week 010 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020010.csv
Saved merged data for week 011 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020011.csv
Saved merged data for week 012 to e:\SAMS\vhp\csv\VHP.G04.C07.j01.P2020012.csv
Saved merged data for week 013 to e:\SAMS\vhp\csv\VH

## Creating Monthly CSV

### Sort files in CSV folder

In [43]:
csv_files = sorted([f for f in os.listdir(cf) if f.endswith('.csv')])

In [44]:
print(csv_files)

['VHP.G04.C07.j01.P2020001.csv', 'VHP.G04.C07.j01.P2020002.csv', 'VHP.G04.C07.j01.P2020003.csv', 'VHP.G04.C07.j01.P2020004.csv', 'VHP.G04.C07.j01.P2020005.csv', 'VHP.G04.C07.j01.P2020006.csv', 'VHP.G04.C07.j01.P2020007.csv', 'VHP.G04.C07.j01.P2020008.csv', 'VHP.G04.C07.j01.P2020009.csv', 'VHP.G04.C07.j01.P2020010.csv', 'VHP.G04.C07.j01.P2020011.csv', 'VHP.G04.C07.j01.P2020012.csv', 'VHP.G04.C07.j01.P2020013.csv', 'VHP.G04.C07.j01.P2020014.csv', 'VHP.G04.C07.j01.P2020015.csv', 'VHP.G04.C07.j01.P2020016.csv', 'VHP.G04.C07.j01.P2020017.csv', 'VHP.G04.C07.j01.P2020018.csv', 'VHP.G04.C07.j01.P2020019.csv', 'VHP.G04.C07.j01.P2020020.csv', 'VHP.G04.C07.j01.P2020021.csv', 'VHP.G04.C07.j01.P2020022.csv', 'VHP.G04.C07.j01.P2020023.csv', 'VHP.G04.C07.j01.P2020024.csv', 'VHP.G04.C07.j01.P2020025.csv', 'VHP.G04.C07.j01.P2020026.csv', 'VHP.G04.C07.j01.P2020027.csv', 'VHP.G04.C07.j01.P2020028.csv', 'VHP.G04.C07.j01.P2020029.csv', 'VHP.G04.C07.j01.P2020030.csv', 'VHP.G04.C07.j01.P2020031.csv', 'VHP.G0

In [8]:
def openMergeCSVFilesFourSet(months: list[str]):
    dfs = [pd.read_csv(os.path.join(cf, f)) for f in months]
    print("Finished loading data...")

    #rename df columns
    for i, df in enumerate(dfs):
        df.rename(columns={'NDVI': f'NDVI{i}', 'BT4': f'BT4{i}', 'VCI': f'VCI{i}', 'TCI': f'TCI{i}', 'VHI': f'VHI{i}'}, inplace=True)
    
    # merge on latitude and longitude
    merged_df = dfs[0]
    for df in dfs[1:]:
        merged_df = pd.merge(merged_df, df, on=['latitude', 'longitude'], suffixes=('', ''))

    return merged_df

In [9]:
def calculateMeanMergedDf(df: pd.DataFrame):
    mean_columns = {}
    for prefix in ['NDVI', 'BT4', 'VCI', 'TCI', 'VHI']:
        cols_to_average = [col for col in df.columns if col.startswith(prefix)]
        mean_columns[prefix] = df[cols_to_average].mean(axis=1)

    mean_df = pd.DataFrame(mean_columns)
    # Add latitude and longitude columns from the df
    mean_df['latitude'] = df['latitude']
    mean_df['longitude'] = df['longitude']
    return mean_df

In [10]:
def saveMonthMeanData(df: pd.DataFrame, i: int):
    month_num = (i // 4) + 1  # Month counter based on weeks processed
    monthly_csv_path = os.path.join(mf, f"VHP.G04.C07.j01.P2020_month{month_num:02d}.csv")
    df.to_csv(monthly_csv_path, index=False)
    
    print(f"Saved merged monthly data to {monthly_csv_path}")

In [56]:
for i in range(0, len(csv_files), 4):
    monthly_files = csv_files[i:i + 4]
    
    merged_df = openMergeCSVFilesFourSet(monthly_files)
    mean_df = calculateMeanMergedDf(merged_df)
    
    saveMonthMeanData(mean_df, i)

Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month01.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month02.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month03.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month04.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month05.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month06.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month07.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month08.csv
Finished loading data...
Saved merged monthly data to e:\SAMS\vhp\monthly\VHP.G04.C07.j01.P2020_month09.csv
Finished loading data...
Sav

In [8]:
mf = os.path.join(vhp, "monthly")
os.makedirs(mf, exist_ok=True)

In [9]:
df = os.path.join(vhp, "global")
os.makedirs(df, exist_ok=True)

In [10]:
def getYearData(months: list[str]):
    dfs = [pd.read_csv(os.path.join(mf, f)) for f in months]
    print("Finished loading data...")

    #rename df columns
    for i, df in enumerate(dfs):
        df.rename(columns={'NDVI': f'NDVI{i}', 'BT4': f'BT4{i}', 'VCI': f'VCI{i}', 'TCI': f'TCI{i}', 'VHI': f'VHI{i}'}, inplace=True)
    
    # merge on latitude and longitude
    merged_df = dfs[0]
    for df in dfs[1:]:
        merged_df = pd.merge(merged_df, df, on=['latitude', 'longitude'], suffixes=('', ''))

    return merged_df

In [13]:
monthly_files = sorted([f for f in os.listdir(mf) if f.endswith('.csv')])
yearVhpDf = getYearData(monthly_files)

Finished loading data...


In [15]:
yearVhpDf.tail()

Unnamed: 0,NDVI0,BT40,VCI0,TCI0,VHI0,latitude,longitude,NDVI1,BT41,VCI1,...,NDVI10,BT410,VCI10,TCI10,VHI10,NDVI11,BT411,VCI11,TCI11,VHI11
7979576,0.41075,276.825005,97.9425,12.4775,55.212499,-55.13399,-67.482,0.401,280.075,74.144999,...,0.2915,275.82501,60.83,10.04,35.434999,0.54225,284.925005,96.8375,4.205,50.5225
7979577,0.39725,277.975005,38.9425,24.402499,31.674999,-55.13399,-67.44601,0.453,284.125005,48.954999,...,0.31,273.15001,54.385,20.34,37.362499,0.57225,287.7,69.704999,10.76,40.234999
7979578,0.44675,282.85,72.522499,10.4575,41.489999,-55.13399,-67.41,0.5075,284.075,62.307499,...,0.2955,268.050005,40.0825,44.337499,42.207499,0.60825,287.90001,60.267499,35.365,47.817499
7979579,0.33425,274.00001,67.565,13.28,40.424999,-55.13399,-67.158005,0.51175,287.05001,72.565,...,0.40475,280.375005,64.029999,14.9175,39.472499,0.49825,283.700005,75.599999,28.092499,51.844999
7979580,0.3665,279.100005,76.495,11.9425,44.22,-55.13399,-67.122,0.55025,287.100005,79.979999,...,0.4865,282.65,75.649999,21.3725,48.512499,0.5175,283.150005,83.507497,19.054999,51.282499


In [16]:
globalPath = os.path.join(df, "VHP.G04.C07.j01.P2020.csv")
yearVhpDf.to_csv(globalPath, index=False)

# VHP Variables to AgriClimate Data

In [3]:
import math
import numpy as np

In [4]:
datasets = os.path.dirname(os.getcwd())
datasets = os.path.join(datasets, "datasets")
print(datasets)

e:\SAMS\datasets


In [11]:
gf = os.path.join(vhp, "global")

In [20]:
yearVhpDf = pd.read_csv(os.path.join(gf, "VHP.G04.C07.j01.P2020.csv"))