In [1]:
import xarray as xr
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.express as px
import os   
import pandas as pd
import datetime 
import math
import time
from sklearn.linear_model import LinearRegression

# Threshold info
thresholds_dict = {
    'annual_threshold_tasmax_high' : {'variable': 'tasmax', 'value': 35, 'min_max_val': 'greater_than', 'b_consecutive': False},
    'annual_threshold_tasmin_high' : {'variable': 'tasmin', 'value': 35, 'min_max_val': 'greater_than', 'b_consecutive': False},
    'annual_threshold_tasmax_low' : {'variable': 'tasmax', 'value': 0, 'min_max_val': 'less_than', 'b_consecutive': False},
    'annual_threshold_tasmin_low' : {'variable': 'tasmin', 'value': 0, 'min_max_val': 'less_than', 'b_consecutive': False},
    # For now keep heat index to maximum values and number of residence days there
    'annual_threshold_heatindex_high': {'variable': 'heatindex', 'value': 100, 'min_max_val': 'greater_than', 'b_consecutive': False},
    'annual_threshold_precipitation_high': {'variable': 'pr', 'value': 1, 'min_max_val': 'greater_than', 'b_consecutive': False},
    'annual_threshold_precipitation_low': {'variable': 'pr', 'value': 1e-3, 'min_max_val': 'less_than', 'b_consecutive': False},
    'annual_threshold_consecutive_precipitation_high': {'variable': 'pr', 'value': 1, 'min_max_val': 'greater_than', 'b_consecutive': True},
    'annual_threshold_consecutive_precipitation_low': {'variable': 'pr', 'value': 1e-3, 'min_max_val': 'less_than', 'b_consecutive': True},
}

# Loading Local Information
location_name = 'Golden, CO'
coord = (39.74256572165156, -105.16857014167543)
average_cold = -1.33
average_hot = 15.37
bool_all_files = False
path = 'NEX-GDDP-CMIP6/'

# Choose between all files or just a select few
if bool_all_files:
    # In the future this will be a os.listdir(), for now manually enter names
    test_list = os.listdir('NEX-GDDP-CMIP6')
    model_experiment_names = []
    for item in test_list:
        if 'download' in item:
            continue
        elif '.' in item:
            continue
        else:
            model_experiment_names.append(item)

else:
    model_experiment_names = ['ACCESS-CM2']

experiments = ['ssp245', 'ssp585']

print(model_experiment_names)


['ACCESS-CM2']


In [20]:

class model:
    def __init__(self, path, model_name, experiment_name):
        self.path = path
        self.model_name = model_name
        self.experiment_name = experiment_name
        self.time_freq = 'day'
        #self.experiments = ['rcp45', 'rcp85']
        self.bool_data_compiled = False
        self.data_lat = 0.0
        self.data_lon = 0.0

        self.variables_to_compile = [
            'hurs',
            'pr', # precipitation
            'sfcWind', # wind
            'tas', 'tasmax', 'tasmin'] #temperature

        # Creating a placeholder while prototyping
        self.annual_variables = ['blank']
        self.decade_variables = ['blank']

        # Setting up dataframes to handle the variables
        if self.time_freq == 'day':
            date_time_span = pd.date_range(start = '2006-01-01 12:00:00', end = '2060-12-31 12:00:00', freq = 'd')
        self.df_data = pd.DataFrame(np.nan, columns = self.variables_to_compile, index = date_time_span)

        col = self.annual_variables
        date_time_span = pd.date_range(start = '2010-01-01 12:00:00', end = '2060-01-01 12:00:00', freq = 'y')
        self.df_annual = pd.DataFrame(np.nan, columns = self.variables_to_compile, index = date_time_span)

        col = self.decade_variables
        date_time_span = pd.date_range(start = '2010-01-01 12:00:00', end = '2060-01-01 12:00:00', freq = '10y')
        self.df_decade = pd.DataFrame(np.nan, columns = self.variables_to_compile, index = date_time_span)

        # Should run this on init and have access to it
        self.get_files_and_variables()

    # Pulls all the files and variables from a folder containing multiple
    # models, frequencies, and experiments
    def get_files_and_variables(self):
        self.file_list = []
        for variable in self.variables_to_compile:
            variable_path = self.path+'/'+self.experiment_name+'/r1i1p1f1/'+variable+'/'
            filenames = os.listdir(variable_path)            
            #print(filenames)

            for file in filenames:
                self.file_list.append(variable_path+file)

        # Saving files and variables to the data list
        #self.file_list = temp_file_list      
        #self.variable_list = np.unique(np.array(temp_variable_list))
        
    # For each file, reads a single file and retuns a series/dataframe with the index as datetime and values as the variable name
    def read_one_file(self, coord, filename, variable):
        handle = xr.open_dataset(filename)

        # Geting Lat Long
        lat = handle['lat'][...].values
        lon = handle['lon'][...].values-180
        ilat = np.argmin(np.abs(lat - coord[0]))
        ilon = np.argmin(np.abs(lon - coord[1]))
        #print(lat[ilat], lon[ilon])
        self.data_lat = lat[ilat]
        self.data_lon = lon[ilon]
        
        # Putting into a Data frame
        df = pd.DataFrame()
        time = handle['time'].values
        #time = xr.CFTimeIndex(time)
        #print(time)
        df.index = pd.Series(time)
        df[variable] = handle[variable][:,ilat, ilon].values

        # Performing temperature calculation if necessary
        if 'tas' in variable:
            df[variable] = df[variable]-273.0 # Converting to Celsius and adding model bias
        if 'pr' in variable:
            df[variable] = df[variable]*86400/25.4 # Converting to Inches
        if 'sfc'in variable:
            df[variable] = df[variable]*2.2369 #Converting to MPH

        return df[variable]


    # Stores all the data produced by reading one file into the data frame which can then carry the data with it through the model
    def compile_data(self, coord):
        print('Data Compiling for ', self.model_name)
        # Adding each data file to the internal data frame
        #for file in data_files:
        for file in self.file_list:
            #items = file.split('_')
            #variable_name = items[0]

            if 'hurs' in file:
                variable = 'hurs'
            elif 'pr' in file:
                variable = 'pr'
            elif 'sfcWind' in file:
                variable = 'sfcWind'
            elif 'tasmax' in file:
                variable = 'tasmax'
            elif 'tasmin' in file:
                variable = 'tasmin'
            elif 'tas' in file:
                variable = 'tas'
            else:
                variable = 'null'

            try:
                temp_series = self.read_one_file(coord, file, variable)
                self.df_data.loc[variable] = temp_series
            except:
                print(file)
                continue

        # Changing the data compiled boolean to true
        self.bool_data_compiled = True
        print('Data Compiled!')




In [21]:
model_list = []
for model_name in model_experiment_names:
    print(model_name)
    for experiment in experiments:
        print(experiment)
        model_to_read = model(path+model_name, model_name+'-'+experiment, experiment)
        model_list.append(model_to_read)
 
print(len(model_list))



ACCESS-CM2
ssp245
ssp585
2


In [23]:
for temp_model in model_list:
    temp_model.compile_data(coord)

Data Compiling for  ACCESS-CM2-ssp245
Data Compiled!
Data Compiling for  ACCESS-CM2-ssp585
Data Compiled!


In [4]:
'''from analyze_model import analyze_model

# Analyzing each model data
for item in model_list:
    try:
        analyze_model(item, thresholds_dict, average_cold, average_hot)
        path = 'output_data/'
        item.df_decade.to_excel(path+item.model_name+'_decade_values.xlsx')
    except: 
        print('Cannot analyze model: ', item, ' insufficient data')'''

"from analyze_model import analyze_model\n\n# Analyzing each model data\nfor item in model_list:\n    try:\n        analyze_model(item, thresholds_dict, average_cold, average_hot)\n        path = 'output_data/'\n        item.df_decade.to_excel(path+item.model_name+'_decade_values.xlsx')\n    except: \n        print('Cannot analyze model: ', item, ' insufficient data')"

In [5]:
'''from graphing import *

# Graphing Models
for item in model_list:
    #item = model_list[0]
    variable_list = item.df_decade.columns
    #print(variable_list)
    for variable in variable_list:

        if 'Days' in variable:
            unit = 'Days per Year'
        elif 'Frequency' in variable:
            unit = 'Average Occurrences per Year'
        elif 'Duration' in variable:
            unit = 'Days'
        elif ('temperature' in variable) or ('Temperature' in variable):
            unit = 'Temperature (°C)'
        elif ('precipitation' in variable) or ('Precipitation' in variable):
            unit = 'Precipitation (inches)'
        else:
            unit = 'Not Assigned'

        graph_write_decade_variable(item, variable, unit)    
    
    print('Graphing Complete')'''

"from graphing import *\n\n# Graphing Models\nfor item in model_list:\n    #item = model_list[0]\n    variable_list = item.df_decade.columns\n    #print(variable_list)\n    for variable in variable_list:\n\n        if 'Days' in variable:\n            unit = 'Days per Year'\n        elif 'Frequency' in variable:\n            unit = 'Average Occurrences per Year'\n        elif 'Duration' in variable:\n            unit = 'Days'\n        elif ('temperature' in variable) or ('Temperature' in variable):\n            unit = 'Temperature (°C)'\n        elif ('precipitation' in variable) or ('Precipitation' in variable):\n            unit = 'Precipitation (inches)'\n        else:\n            unit = 'Not Assigned'\n\n        graph_write_decade_variable(item, variable, unit)    \n    \n    print('Graphing Complete')"