# Installations

# Imports

In [1]:
import io
import os
%matplotlib inline
import matplotlib.pyplot as plt

# data management
import xarray as xr
import numpy as np
import pandas as pd

# Azure Connectivity
import getpass
import azure.storage.blob
from azure.storage.blob import BlobClient

# utilities
import time
import urllib.parse
from random import randint


### Function to download a specified file

In [2]:
def download_file(sas_url, filename, overwrite_local_file):
    """
    Downloads the specified file from Azure blob storage
    If asked to not overwrite, then first checks if the file is available locally and does not download again in that case.
    - sas_url   : url complete with sas token
    - filename  : name of the downloaded file
    - overwrite_local_file : if True, will overwrite, else, if already available locally, will not download again.
    Returns:  None
    """
    if overwrite_local_file or not (os.path.isfile(filename)):        
        blob_client = BlobClient.from_blob_url(sas_url)
        with open(filename, "wb") as my_blob:
            download_stream = blob_client.download_blob()
            my_blob.write(download_stream.readall())

                  

In [12]:
def calculate_average_for_region(ds_for_region):
    """
    Takes in an xarray dataset that represents a region, over a specific time dimension 
    and reduces all of the cells in the region, for each time period, to a single 
    average value. The individual lat, lon grids for each time period become 
    a single average value.
    Input Parameters:
    - ds_for_region  : xarray dataframe 
    Returns:  The xarray dataset 'ds_for_region' that contains the average of the values across the entire region. 
    """
    
    num_of_lats = len(ds_for_region.coords['lat'].values)
    num_of_lons = len(ds_for_region.coords['lon'].values)
    
    total_cells_in_region = num_of_lats * num_of_lons
    
    return ds_for_region.sum(dim='lat').sum(dim='lon').pipe(lambda x: x / total_cells_in_region)




In [13]:
def convert_kelvin_to_celsius(temp: float) -> float:
    """
    Converts Kelvin temperature to Celcius
    Input Parameters:
    - temp  : float
    Returns: float celsius value 
    """
    return temp - 273.15

In [14]:
def convert_kelvin_to_fahrenheit(temp: float) -> float:
    """
    Converts Kelvin temperature to Fahrenheit
    Input Parameters:
    - temp  : float
    Returns: float fahrenheit value 
    """
    return (temp - 273.15)*(9/5) + 32

In [15]:
def get_region_and_generate_average(ds, start_lat: float, end_lat: float, start_lon: float, end_lon: float, outputUnits: str = 'K'):
    """
    Takes in an xarray dataset that represents a region, over a specific time dimension 
    and reduces all of the cells in the region, for each time period, to a single 
    average value. The individual lat, lon grids for each time period become 
    a single average value.
    Input Parameters:
    - ds  : xarray dataframe 
    - start_lat  : float starting latitude of region
    - end_lat  : float ending latitude of region
    - start_lon  : float starting lonitude of region
    - end_lon  : float endinging lonitude of region
    - outputUnits  : string desired output units
    Returns:  The xarray dataset 'ds_avg_region' that contains the average of the values across the entire region converted to desired units. 
    """
    
    ACCEPTED_UNITS = ["C", "F", "K"]
    desiredUnits = outputUnits.upper()
    
    # Baseline Checks to make sure Inputs are reasonable
    if start_lat >= end_lat:
        raise Exception("CoordinateException: start_lat must be smaller than end_lat")
    if start_lon >= end_lon:
        raise Exception("CoordinateException: start_lon must be smaller than end_lon")
    
    if desiredUnits in ACCEPTED_UNITS:
        # Slice the region of interest out of your Dataset
        ds_avg_region = calculate_average_for_region(ds.sel(lat=slice(start_lat, end_lat), lon=slice(start_lon, end_lon)))
        
        # Dataset is as small as possible so now it is time for unit conversion
        if desiredUnits == ACCEPTED_UNITS[0]:
            ds_avg_region = ds_avg_region.map(convert_kelvin_to_celsius)
        elif desiredUnits == ACCEPTED_UNITS[1] :
            ds_avg_region = ds_avg_region.map(convert_kelvin_to_fahrenheit)
    else:
        raise Exception("InvalidOutputUnitsException: must be one of: " + ", ".join(ACCEPTED_UNITS))
    
    return ds_avg_region
       

## Calculating Averages for a Particular Region

In [7]:
average_ds_filename = 'Avg_temp_max_CMIP6__30_yrs__1950_to_1979.nc'
max_temp_ds_filename = '10_year_max_temp__Rgn_1__2020_to_2029__CMIP6_ssp245.nc'

average_ds = xr.open_dataset(average_ds_filename)
temps_ds = xr.open_dataset(max_temp_ds_filename)



In [8]:
average_ds

In [11]:
temps_ds

In [14]:
temp_2020_ds = temps_ds.sel(time='2020')

In [16]:
# Bounding Box For Seattle Region

start_lat = 47.25
end_lat = 48.12
start_lon = 237.68
end_lon = 238.44


In [16]:
average_ds = average_ds.sel(lat=slice(start_lat, end_lat), lon=slice(start_lon, end_lon))

In [17]:
temp_2020_ds = temp_2020_ds.sel(lat=slice(start_lat, end_lat), lon=slice(start_lon, end_lon))

In [20]:
average_ds

In [21]:
temp_2020_ds

### Swap time dimension with day dimension so comparisons will work correctly 

In [58]:
temp_2020_ds = temp_2020_ds.assign_coords(day=temp_2020_ds.time.dt.dayofyear)
temp_2020_ds = temp_2020_ds.swap_dims({"time": "day"})
temp_2020_ds

In [22]:
average_ds['tasmaxavg'][:]

In [60]:
temp_2020_ds['tasmax'][:]

In [80]:
temp_sub_ds = np.subtract(temp_2020_ds['tasmax'], average_ds['tasmaxavg'])
temp_sub_ds

In [68]:
calculate_average_for_region(temp_sub_ds)

## Simpler Difference Calcualtion 

In [78]:
average_simp_ds = calculate_average_for_region(average_ds)
temp_simp_ds = calculate_average_for_region(temp_2020_ds)

In [79]:
np.subtract(temp_simp_ds['tasmax'], average_simp_ds['tasmaxavg'])

# Conclusion

Whether we are taking an average of the temperature difference of a region's pixels vs its historical average or getting the average temperature for an entire region and then subtracing it from the histroical average temperature for that entire region the results are the same.

### Using Updated extremes Files For Comparison

In [9]:
download_file("https://nasanex30analysis.blob.core.windows.net/cmip6/extremes_max/Ext_max_t__Rgn_1__2015__Abv_Avg_1_K_for_3_days__CMIP6_ssp245_Avg_yrs_1950_79.nc?sp=r&st=2021-10-21T04:11:44Z&se=2021-10-21T12:11:44Z&spr=https&sv=2020-08-04&sr=c&sig=5TSqNH6YUrPVApRjGKheLcYX8MTOj%2FC71cOxSBwGdT8%3D",
             "Ext_max_t__Rgn_1__2015__Abv_Avg_1_K_for_3_days__CMIP6_ssp245_Avg_yrs_1950_79.nc", True)





In [18]:
heat_extreme_ds = xr.open_dataset("Ext_max_t__Rgn_1__2015__Abv_Avg_1_K_for_3_days__CMIP6_ssp245_Avg_yrs_1950_79.nc")

heat_extreme_ds

In [19]:
heat_extreme_ds = heat_extreme_ds.sel(lat=slice(start_lat, end_lat), lon=slice(start_lon, end_lon))

heat_extreme_ds

In [20]:
heat_extreme_region_avg = calculate_average_for_region(heat_extreme_ds["above_threshold"])

heat_extreme_region_avg