# Function for Weighing Grid Cells by Land Area 
## Example Parameter: Avg TSA
#### Author: Sofia Ingersoll & Sujan Bhattaria
##### Date: 2024-01-15

##### Packages

In [1]:
# loading packages
import pandas as pd
import xarray as xr
import numpy as np
import plotly.express as px
import matplotlib.pyplot as plt

##### Importing utils.py Functions 

In [2]:
from utils import *

##### Data
Both of these files will be stored in the utils library to aid in the automation of visual outputs.

In [4]:
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ----     load data stored in casper     ----
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

#-------Gridcell Landareas Data-----

# reading, storing, subsetting
landarea_file = '/glade/campaign/cgd/tss/projects/PPE/helpers/sparsegrid_landarea.nc'

landarea_ds = xr.open_dataset(landarea_file)

landarea = landarea_ds['landarea']


#-------Dummy Variable Data---------

# dummy data to have stored for preloaded visual on 
dummy_filepath = '/glade/campaign/cgd/tss/projects/PPE/PPEn11_OAAT/CTL2010/hist/PPEn11_CTL2010_OAAT0000.clm2.h0.2005-02-01-00000.nc'

# variable data described by filepath stored as ds
ds = xr.open_dataset(dummy_filepath)

# the desired variable may be subset from this data array
# da = ds[variable]

### Function to Automate Global Avg Weighted Landarea by Gridcell & Weighting Time

**Landarea**: Using the numpy function `weighted`, we can weigh the grid cell dimension of the data array according to landarea. 

**Time**: By grouping days in the month and year then averaging, an appropriate weight is applied to the time dimension.

The data used to define land area `landarea_file` will be preloaded by our system in the utils library, alongside the function below, upon final optimization.


***Note** The dummy variable data is currently labeled as ds, so this data can be plugged directly into the function below.* 

#### Building up function to include variable(s) selection

In [None]:
# function to read in data using variable & time range of interest

# we'll need to consider what our User will be able to select on the Dashboard side of things.
# we want to create a drop down selection for variables and avaiable time ranges.
# this will aid in the data set scraping process.
# certain cut offs 
def ds_processing(variable, time_range):

#-------User Variable Data-----------------
# variable data described by filepath stored as ds
# reading, storing, subsetting
    
    # the structure for the end of this filepath string will need some more flexibility
    file = '/glade/campaign/cgd/tss/projects/PPE/PPEn11_OAAT/CTL2010/hist/PPEn11_CTL2010_OAAT0000.clm2.h0.2005-02-01-00000.nc'
    
    ds = xr.open_dataset(file)
    
    variable = ds[variable]



#------Weight Gridcells by Landarea---
    for gridcell in variable:
        variable.weighted(landarea)


#-----Calculating Variable Avg--------
variable_avg = variable.mean(dim = 'time')

#ds_processing(filepath_example, 'TSA')


#-----Generating Plot------------------
plt.figure(figsize = (9,4))
plt.scatter(ds.grid1d_lon,
            ds.grid1d_lat,
            c = variable_avg,
           cmap = 'RdPu',
           alpha = 0.5)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('Default Simulation: OAAT0000')
plt.colorbar(label = 'Average TSA: 2m air temperature (K)')        # need to correct w/ f string. also want to implement f string for variable name in a few different places in this function

## Isolated functions that work properly

In [28]:
def yearly_weighted_average(da):
    # Get the array of number of days from the main dataset
    days_in_month = da['time.daysinmonth']
    # Multiply each month's data by corresponding days in month
    weighted_sum = (days_in_month*da).groupby("time.year").sum(dim = 'time') 
     # Total days in the year
    total_days = days_in_month.groupby("time.year").sum(dim = 'time') 
    
    return weighted_sum / total_days  

In [29]:
yearly_weighted_average(da)

In [10]:
ds.TSA.dims

('time', 'gridcell')

In [11]:
file2 = '/glade/campaign/cgd/tss/projects/PPE/helpers/sparsegrid_landarea.nc'
ds2 = xr.open_dataset(file2)
landarea = ds2['landarea']
weighted_avg_area = ds.TSA.weighted(landarea).mean(dim = 'gridcell')
weighted_avg_area

In [12]:
(1/landarea.sum())*(landarea*ds.TSA).sum(dim = 'gridcell')