In [1]:
import xarray as xr
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from scipy import stats
from sklearn import datasets, model_selection

#### Let's try working with 10 years worth of data from the .nc files
1. Import relevant files
2. Explore temperature trends (plot Western US seasonal temperature fluctuations)
3. Eliminate seasonal cycles for temperature and precipitation <br>
    (a) average each day of the year's temperature, remove this value from each day to reveal daily anomolies <br>
    (b) average each day of the year's precipitation, remove this amount from each day to reveal daily anomolies
4. Remove climatological trend - reread UCB class notes for removing trend via time series analysis ML
5. Isolate MJO precipitation
6. Correlate with western pacific precipitation?

In [6]:
directory = '/Users/C830793391/BIG DATA/E3SM Data/member1/bilinear/'

dataset_combined = xr.DataArray()

for file in os.listdir(directory):
    if file.endswith(".nc"):
        year_filename_position = 29
        year_string = file[year_filename_position:year_filename_position + 4]
        try:
            year = int(year_string)
            if 2000 <= year <= 2010:
                file_path = os.path.join(directory, file)
                dataset = xr.open_dataset(file_path)

                surface_temp = dataset['TS'] - 273.15
                precip = dataset['PRECT']

                dataset_combined = xr.concat([dataset_combined, surface_temp, precip], dim ='time')

                dataset.close()
        except ValueError:
            pass

specific_key = "long_name"
for variable_name in dataset.variables:     # variable_name are the variables, dataset.variables contains the variables
    variable = dataset[variable_name]       # collecting all the variables from the dataset
    if specific_key in variable.attrs:      # if 'long_name' is present in the attributes of each iterated variable, call and print the variable and its corresponding attribute
        print(f"{variable_name} : {variable.attrs[specific_key]}")

lat : Latitude of Grid Cell Centers
lon : Longitude of Grid Cell Centers
lat_bnds : Gridcell latitude interfaces
lon_bnds : Gridcell longitude interfaces
gw : Latitude quadrature weights (normalized to sum to 2.0 on global grids)
area : Solid angle subtended by gridcell
PRECT : Total (convective and large-scale) precipitation rate (liq + ice)
TS : Surface temperature (radiative)
U050 : Zonal wind at 50 mbar pressure surface
U100 : Zonal wind at 100 mbar pressure surface
U250 : Zonal wind at 250 mbar pressure surface
U850 : Zonal wind at 850 mbar pressure surface
Z500 : Geopotential Z at 500 mbar pressure surface
time : time
time_bnds : time interval endpoints


In [4]:
seasonal_temperature = dataset_combined.sel(time=dataset_combined['time.season'] == 'DJF')

# Plotting example: Plot the mean temperature for each time step in the selected season
seasonal_temperature.mean(dim='time').plot()

# Customize the plot as needed
plt.title('Seasonal Temperature (DJF)')
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.show()

KeyError: 'time'