In [236]:
import pandas as pd
import pandas as pd, numpy as np
from pathlib import Path
import fsspec
import matplotlib.pyplot as plt
from scipy.stats import pearsonr

Exam Functions

In [237]:
def station_data(station_id: str,
                 begin_date = "1-Jan-1991",
                 end_date = "31-Dec-2020"):
    
    '''Receives a GHCN Station ID string and returns the daily min
       temperature for the specified period the period 1991-2020'''
    

    # Site hearder plus id path
    download_string = "s3://noaa-ghcn-pds/csv/by_station/" + str(station_id) + ".csv"

    df = pd.read_csv( 
                     download_string,
                     storage_options={"anon": True},  # passed to `s3fs.S3FileSystem`
                     dtype={'Q_FLAG': 'object', 'M_FLAG': 'object'},
                     parse_dates=['DATE']
                     ).set_index('DATE')
    
    # Filtering for date
    df = df[(df.index >= pd.to_datetime(begin_date)) & (df.index <= pd.to_datetime(end_date))]

    # Selecting only Minimum and Maximum temperature observations
    df_daily_min = df[(df['ELEMENT'] == 'TMIN')]

    result = pd.DataFrame({"daily_min_temp": df_daily_min['DATA_VALUE']/10, 
                           }
                           ) 
    result.index =  df_daily_min.index

    return result



Exam variables

In [238]:
# Station where we are quantifying frost/freeze risk

station = 'USC00087205'
enso_link = 'https://www.cpc.ncep.noaa.gov/data/indices/sstoi.indices'

## Question 1

In [239]:
# Loading station data

df = station_data(station)

  df = pd.read_csv(


In [240]:
# Converting the data to fahrenheit

df['daily_min_temp'] = (df['daily_min_temp']*(9/5)) + 32

In [241]:
# Selecting only data for the months of January, October, November, December

df_J = df[df.index.month == 1]
df_O = df[df.index.month ==10]
df_N = df[df.index.month == 11]
df_D = df[df.index.month == 12]

In [242]:
# Calculating Januray Freeze days
df_J_frz = df_J['daily_min_temp'] <=28
df_J_frz = df_J_frz.groupby(df_J_frz.index.year).sum()

# Calculating January Frost Days
df_J_frst = df_J['daily_min_temp'] <=32
df_J_frst = df_J_frst.groupby(df_J_frst.index.year).sum()

# Calculating October Freeze days
df_O_frz = df_O['daily_min_temp'] <=28
df_O_frz = df_O_frz.groupby(df_O_frz.index.year).sum()

# Calculating October Frost Days
df_O_frst = df_O['daily_min_temp'] <=32
df_O_frst = df_O_frst.groupby(df_O_frst.index.year).sum()

# Calculating November Freeze days
df_N_frz = df_N['daily_min_temp'] <=28
df_N_frz = df_N_frz.groupby(df_N_frz.index.year).sum()

# Calculating November Frost Days
df_N_frst = df_N['daily_min_temp'] <=32
df_N_frst = df_N_frst.groupby(df_N_frst.index.year).sum()

# Calculating December Freeze days
df_D_frz = df_D['daily_min_temp'] <=28
df_D_frz = df_D_frz.groupby(df_D_frz.index.year).sum()

# Calculating December Frost Days
df_D_frst = df_D['daily_min_temp'] <=32
df_D_frst = df_D_frst.groupby(df_D_frst.index.year).sum()

In [243]:
# Calculating the frost risk by taking the mean of each month's frost days

Jan_frost_risk = df_J_frst.mean()
Oct_frost_risk = df_O_frst.mean()
Nov_frost_risk = df_N_frst.mean()
Dec_frost_risk = df_D_frst.mean()

In [244]:
# Calculating the freeze risk by taking the mean of each month's frost days

Jan_freeze_risk = df_J_frz.mean()
Oct_freeze_risk = df_O_frz.mean()
Nov_freeze_risk = df_N_frz.mean()
Dec_freeze_risk = df_D_frz.mean()

In [245]:
print('Frost Risks per Winter Month for Plant City, FL')
print(f'January: {np.round(Jan_frost_risk,2)} days')
print(f'October: {np.round(Oct_frost_risk,2)} days')
print(f'November: {np.round(Nov_frost_risk,2)} days')
print(f'December: {np.round(Dec_frost_risk,2)} days')

Frost Risks per Winter Month for Plant City, FL
January: 1.87 days
October: 0.0 days
November: 0.03 days
December: 0.6 days


In [246]:
print('Freeze Risks per Winter Month for Plant City, FL')
print(f'January: {np.round(Jan_freeze_risk,3)} days')
print(f'October: {np.round(Oct_freeze_risk,3)} days')
print(f'November: {np.round(Nov_freeze_risk,3)} days')
print(f'December: {np.round(Dec_freeze_risk,3)} days')

Freeze Risks per Winter Month for Plant City, FL
January: 0.5 days
October: 0.0 days
November: 0.0 days
December: 0.167 days


## Question 2

In [247]:
# Column names for the ENSO data
names = ['year', 'month', 'NINO1+2', 'NINO1+2_Anom', 'NINO3', 'NINO3_Anom', 'NINO4', 'NINO4_Anom', 'NINO3.4', 'NINO3.4_Anom']

# Loading the Data
enso = pd.read_csv(enso_link,sep=r'\s+',header=None,skiprows=4,skipfooter=0, names = names, engine='python')
enso = enso.set_index(pd.date_range(start = 'Apr-1982', end = 'Nov-2025', freq = 'MS'))

In [248]:
# Converting the freeze day indices back to a datetimes

Jan_dates = pd.to_datetime(df_J_frz.index, format = '%Y')
Oct_dates = Jan_dates.map(lambda t: t.replace(month=10))
Nov_dates = Jan_dates.map(lambda t: t.replace(month=11))
Dec_dates = Jan_dates.map(lambda t: t.replace(month=12))

df_D_frz.index = (Dec_dates)
df_J_frz.index = (Jan_dates)
df_N_frz.index = (Nov_dates)
df_O_frz.index = (Oct_dates)


In [249]:
# Combining the freeze days back into a single dataset of ONDJ months
winter_frz = pd.concat((df_D_frz, df_J_frz, df_N_frz, df_O_frz))
winter_frz = winter_frz.sort_index(ascending = True)

In [250]:
# Merging the data so only ONDJ months remain
combined_data = pd.merge(enso, winter_frz, left_index=True, right_index=True, how='inner')

In [251]:
combined_data

Unnamed: 0,year,month,NINO1+2,NINO1+2_Anom,NINO3,NINO3_Anom,NINO4,NINO4_Anom,NINO3.4,NINO3.4_Anom,daily_min_temp
1991-01-01,1991,1,23.73,-0.78,25.63,-0.05,28.62,0.40,26.89,0.33,0
1991-10-01,1991,10,21.09,0.22,25.36,0.27,29.31,0.63,27.41,0.64,0
1991-11-01,1991,11,21.92,0.29,25.93,0.73,29.12,0.44,27.71,0.89,0
1991-12-01,1991,12,23.13,0.29,26.30,1.03,29.21,0.77,28.14,1.50,0
1992-01-01,1992,1,24.59,0.07,26.86,1.18,28.80,0.58,28.23,1.67,0
...,...,...,...,...,...,...,...,...,...,...,...
2019-12-01,2019,12,23.11,0.27,25.56,0.29,29.29,0.85,27.08,0.44,0
2020-01-01,2020,1,24.49,-0.03,25.89,0.22,29.16,0.94,27.08,0.52,0
2020-10-01,2020,10,20.12,-0.76,24.19,-0.90,27.97,-0.71,25.64,-1.13,0
2020-11-01,2020,11,20.95,-0.67,24.17,-1.03,27.98,-0.70,25.59,-1.23,0


In [252]:
Nino1_2_corr = pearsonr(combined_data['NINO1+2_Anom'], combined_data['daily_min_temp'])
Nino3_corr = pearsonr(combined_data['NINO3_Anom'], combined_data['daily_min_temp'])
Nino4_corr = pearsonr(combined_data['NINO4_Anom'], combined_data['daily_min_temp'])
Nino34_corr = pearsonr(combined_data['NINO3.4_Anom'], combined_data['daily_min_temp'])

In [253]:
print('The correlations for ENSO regions and number of freeze days per month in Plant City, Fl is as follows:')
print(f'Nino 1+2: {np.round(Nino1_2_corr.statistic,2)} with a p-value of {np.round(Nino1_2_corr.pvalue,2)}')
print(f'Nino 3: {np.round(Nino3_corr.statistic,2)} with a p-value of {np.round(Nino3_corr.pvalue,2)}')
print(f'Nino 4: {np.round(Nino4_corr.statistic,2)} with a p-value of {np.round(Nino4_corr.pvalue,2)}')
print(f'Nino 3.4: {np.round(Nino34_corr.statistic,2)} with a p-value of {np.round(Nino34_corr.pvalue,2)}')

The correlations for ENSO regions and number of freeze days per month in Plant City, Fl is as follows:
Nino 1+2: -0.14 with a p-value of 0.14
Nino 3: -0.12 with a p-value of 0.18
Nino 4: -0.14 with a p-value of 0.14
Nino 3.4: -0.11 with a p-value of 0.22


There doesn't seem to be a strong correlation between ENSO regions and number of freeze days per month in Plant City, Fl