In [2]:
import sys
import calendar
import pandas as pd
import numpy as np
import datetime as dt
from pathlib import Path

In [148]:
PROJPATH = Path().resolve().parent
sys.path.append((PROJPATH / 'src').as_posix())
from predictors_remote import d_urls


datadir = PROJPATH / 'data'
outdir = datadir / 'working'
timestamp = dt.datetime.now().strftime("%Y%m%d%H%M")

In [6]:
squaredata = ['Arctic_Oscillation.txt', 'Nino3.txt', 
              'EastPac_NorthPac.txt', 'Nino34.txt', 'Southern_Osc.txt',
              'Nino1_2.txt', 'Nino4.txt', 'PacificNA.txt']
extra = 'cpc_multiple_teleconn.txt'
oni = 'Oceanic_Nino.txt'
pdo = 'Pacific_Decadal.txt'
npi = 'North_Pac_Pattern.txt'

columns = ['Year'] + list(calendar.month_abbr)[1:]
columns

['Year',
 'Jan',
 'Feb',
 'Mar',
 'Apr',
 'May',
 'Jun',
 'Jul',
 'Aug',
 'Sep',
 'Oct',
 'Nov',
 'Dec']

In [106]:
d = dict(enumerate(calendar.month_abbr))
d

{0: '',
 1: 'Jan',
 2: 'Feb',
 3: 'Mar',
 4: 'Apr',
 5: 'May',
 6: 'Jun',
 7: 'Jul',
 8: 'Aug',
 9: 'Sep',
 10: 'Oct',
 11: 'Nov',
 12: 'Dec'}

In [7]:
squareteleconn = [teleconn for teleconn in d_urls.TELECONNECTIONURLS if teleconn['format'] == 'PSL']
squareDF = pd.DataFrame.from_records(squareteleconn)
squareDF

Unnamed: 0,name,shortname,format,nodata,skipfooter,skipentry,URL
0,Arctic_Oscillation,AO,PSL,-999.0,3,False,https://psl.noaa.gov/data/correlation/ao.data
1,EastPac_NorthPac,EP-NP,PSL,-99.9,3,False,https://psl.noaa.gov/data/correlation/epo.data
2,Pacific_NA,PNA,PSL,-99.9,3,False,https://psl.noaa.gov/data/correlation/pna.data
3,Southern_Osc,SOI,PSL,-99.99,3,False,https://psl.noaa.gov/data/correlation/soi.data
4,Nino1_2,Nino1+2,PSL,-99.99,3,False,https://psl.noaa.gov/data/correlation/nina1.an...
5,Nino3,Nino3,PSL,-99.99,3,False,https://psl.noaa.gov/data/correlation/nina3.an...
6,Nino34,Nino3.4,PSL,-99.99,3,False,https://psl.noaa.gov/data/correlation/nina34.a...
7,Nino4,Nino4,PSL,-99.99,3,False,https://psl.noaa.gov/data/correlation/nina4.an...
8,Oceanic_Nino,ONI,PSL,-99.9,8,False,https://psl.noaa.gov/data/correlation/oni.data


In [80]:
def get_data(row):
    fp = datadir / f"predictors_raw/teleconnections/{row['name']}.txt"
    data = pd.read_csv(fp, skiprows=1, skipfooter=row.skipfooter, sep=r'\s+', names=columns,
                       engine='python')
    data = data.astype(float)
    data['Year'] = data['Year'].astype(int)
    data.replace(row.nodata, np.nan, inplace=True)
    data = data[data['Year'] >= 1980]
    data.meta = row['shortname']
    return data

In [81]:
results = squareDF.apply(get_data, axis=1)
print(results[2].meta)
results[2]

PNA


Unnamed: 0,Year,Jan,Feb,Mar,Apr,May,Jun,Jul,Aug,Sep,Oct,Nov,Dec
32,1980,-1.01,2.03,-0.64,1.66,-0.16,-0.81,-0.81,-2.03,-0.08,2.87,1.28,-0.57
33,1981,2.46,0.21,1.27,-1.23,1.83,-0.1,-1.02,-1.54,0.08,-1.43,1.19,-0.42
34,1982,-1.75,-1.2,-1.66,-2.08,-0.53,2.17,0.49,0.31,0.83,-0.86,-0.47,0.45
35,1983,0.87,1.44,2.03,0.98,-0.08,2.1,0.82,0.5,-1.37,0.26,1.75,-0.61
36,1984,0.6,0.73,1.08,1.41,0.37,-0.36,-2.61,-1.14,-0.02,-0.53,0.39,-1.9
37,1985,1.44,-0.99,-1.19,-1.27,-0.82,1.28,-0.19,-0.53,-0.63,-1.5,-1.83,1.08
38,1986,0.6,0.41,0.51,-0.15,-0.03,0.39,-0.35,-1.49,-0.28,1.18,-0.71,1.06
39,1987,0.63,0.57,0.85,1.54,-1.02,0.56,-0.43,0.74,-2.6,0.75,1.19,0.5
40,1988,0.03,1.37,0.38,1.12,0.61,1.31,1.6,-0.62,-0.97,0.89,0.11,0.33
41,1989,-1.57,-1.71,-1.56,-0.77,-0.04,-0.56,-0.59,-0.27,0.41,-1.08,-0.7,0.57


In [82]:
out = []
for result in results:
    result.set_index('Year', inplace=True)
    # result['Variable'] = result.meta
    # result.rename(columns={item: result.meta + '_' + item for item in result.columns }, inplace=True)
    meta = result.meta
    newDF = pd.DataFrame(result.loc[2004, ['Mar', 'Apr', 'May', 'Jun', 'Jul']])
    newDF.columns = [meta + '_2004']
    out.append(newDF)


In [83]:
out[4]

Unnamed: 0,Nino1+2_2004
Mar,-0.74
Apr,-0.57
May,-1.59
Jun,-1.07
Jul,-0.78


In [None]:
squareDF = pd.concat(out, axis=1)

Unnamed: 0,AO_2004,EP-NP_2004,PNA_2004,SOI_2004,Nino1+2_2004,Nino3_2004,Nino3.4_2004,Nino4_2004,ONI_2004
Mar,0.318,-1.28,-0.0,0.7,-0.74,0.13,0.12,0.1,0.23
Apr,-0.409,1.23,0.25,-1.5,-0.57,-0.07,0.07,0.09,0.17
May,-0.094,1.92,-1.46,1.7,-1.59,-0.45,0.06,0.11,0.17
Jun,-0.236,3.36,-0.28,-1.4,-1.07,-0.26,0.13,0.22,0.28
Jul,-0.201,1.19,-0.33,-0.8,-0.78,-0.02,0.49,0.42,0.47


In [90]:
fp_pdo = datadir / f"predictors_raw/teleconnections/{pdo}"
data_pdo = pd.read_csv(fp_pdo, skiprows=2, delim_whitespace=True, names=columns,
                       engine='python')
data_pdo = data_pdo.astype(float)
data_pdo['Year'] = data_pdo['Year'].astype(int)
data_pdo.set_index('Year', inplace=True)
data_pdo.replace(99.99, np.nan, inplace=True)

meta = 'PDO'
newDF = pd.DataFrame(data_pdo.loc[2004, ['Mar', 'Apr', 'May', 'Jun', 'Jul']])
newDF.columns = [meta + '_2004']
out.append(newDF)
newDF

  data_pdo = pd.read_csv(fp_pdo, skiprows=2, delim_whitespace=True, names=columns,


Unnamed: 0,PDO_2004
Mar,-0.15
Apr,-0.0
May,0.61
Jun,-0.11
Jul,0.04


In [125]:
fp_npi = datadir / f"predictors_raw/teleconnections/{npi}"
data_npi = pd.read_csv(fp_npi, skiprows=1, delim_whitespace=True, names=['YrMnth', 'NPI'],
                       engine='python')

data_npi.replace(-999.0, np.nan, inplace=True)
data_npi.dropna(inplace=True)
data_npi['Year'] = data_npi['YrMnth'].astype(str).str[:4].astype(int)
data_npi['Month'] = data_npi['YrMnth'].astype(str).str[4:].astype(int)
data_npi['Month'] = data_npi.Month.map(d)
data_npi = data_npi[data_npi['Month'].isin(['Mar', 'Apr', 'May', 'Jun', 'Jul'])]
# data_npi = data_npi.astype(float)
data_npi['Year'] = data_npi['Year'].astype(int)
data_npi.set_index('Year', inplace=True)
data_npi['NPI'] = data_npi.groupby('Month')['NPI'].transform(lambda x: x - x.mean())
data_npi.drop(columns=['YrMnth'], inplace=True)
# meta = 'NPI'
# newDF = pd.DataFrame(data_npi.loc[2004, ['Mar', 'Apr', 'May', 'Jun', 'Jul']])
# newDF.columns = [meta + '_2004']
# out.append(newDF)
# newDF
# newDF = data_npi.loc[2004].set_index('Month', inplace=True)
# newDF

newDF = data_npi.loc[2004].reset_index().drop(columns=['Year']).set_index('Month')
newDF.columns = ['NPI' + '_2004']
out.append(newDF)
newDF

  data_npi = pd.read_csv(fp_npi, skiprows=1, delim_whitespace=True, names=['YrMnth', 'NPI'],


Unnamed: 0_level_0,NPI_2004
Month,Unnamed: 1_level_1
Mar,1.676746
Apr,-3.24272
May,0.19504
Jun,1.31616
Jul,-2.5468


In [126]:
fp_extra = datadir / f"predictors_raw/teleconnections/{extra}"
with open(fp_extra, 'r') as src:
    txt = src.read().replace('-', ' -')
with open(fp_extra, 'w') as dst:
    dst.write(txt)

In [None]:
columns = ['Year', 'Month', 'NAO', 'EA', 'WP', 'EP/NP', 'PNA', 'EA-WR', 'SCA', 'TNH', 'POL', 'PT', 'P2']
data_extra = pd.read_csv(fp_extra, skiprows=19, delim_whitespace=True, names=columns, engine='python')


data_extra = data_extra.astype(float)

data_extra.replace(-99.9, np.nan, inplace=True)
data_extra.dropna(inplace=True, axis=1)
data_extra['Year'] = data_extra['Year'].astype(int)
data_extra['Month'] = data_extra['Month'].astype(int)
data_extra['Month'] = data_extra.Month.map(d)
data_extra = data_extra[data_extra['Month'].isin(['Mar', 'Apr', 'May', 'Jun', 'Jul'])]
data_extra = data_extra[data_extra['Year'] == 2004]
data_extra.drop(columns=['Year'], inplace=True)
data_extra.set_index('Month', inplace=True)
data_extra.columns = [col + '_2004' for col in data_extra.columns]

data_extra


  data_extra = pd.read_csv(fp_extra, skiprows=19, delim_whitespace=True, names=columns, engine='python')


Unnamed: 0_level_0,NAO_2004,EA_2004,WP_2004,PNA_2004,EA-WR_2004,SCA_2004,POL_2004,P2_2004
Month,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Mar,0.67,1.21,0.13,-0.0,0.87,1.12,-1.13,61.0
Apr,1.11,-0.34,0.49,0.25,0.9,1.04,-1.73,57.8
May,0.23,-1.18,0.4,-1.46,-0.12,-1.53,-1.2,54.0
Jun,-0.59,0.67,2.08,-0.28,-0.06,-1.26,0.14,56.8
Jul,1.16,-0.23,0.34,-0.33,-0.28,-0.5,-1.6,19.5


In [141]:
out.append(data_extra)

In [143]:
pd.concat(out, axis=1).T

Unnamed: 0,Mar,Apr,May,Jun,Jul
AO_2004,0.318,-0.409,-0.094,-0.236,-0.201
EP-NP_2004,-1.28,1.23,1.92,3.36,1.19
PNA_2004,-0.0,0.25,-1.46,-0.28,-0.33
SOI_2004,0.7,-1.5,1.7,-1.4,-0.8
Nino1+2_2004,-0.74,-0.57,-1.59,-1.07,-0.78
Nino3_2004,0.13,-0.07,-0.45,-0.26,-0.02
Nino3.4_2004,0.12,0.07,0.06,0.13,0.49
Nino4_2004,0.1,0.09,0.11,0.22,0.42
ONI_2004,0.23,0.17,0.17,0.28,0.47
PDO_2004,-0.15,-0.0,0.61,-0.11,0.04


In [149]:
outfn = f'teleconnections_2004_{timestamp}'
pd.concat(out, axis=1).T.to_csv(outdir / outfn, float_format='%.3g')
