# Final Project 

In [1]:
import numpy as np
import pandas as pd 
import scipy as sci
import matplotlib as mp
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

from itertools import chain
from numpy import pi, cos, sin, exp, sqrt
from scipy.signal import freqz, welch, periodogram, butter, lfilter, filtfilt, boxcar, ricker, cwt
from scipy.interpolate import griddata

from matplotlib.dates import DateFormatter, MinuteLocator, HourLocator, MonthLocator
from matplotlib.ticker import FormatStrFormatter, StrMethodFormatter
from matplotlib.ticker import FixedFormatter

from mpl_toolkits.basemap import Basemap
from textwrap import wrap

%matplotlib inline
%config InlineBackend.figure_format = 'pdf'
# %matplotlib notebook

## Import and clean up the data

In [2]:
# Import data
# All data (hour resolution)
AS = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/AllStations_temperature_h_2017.dat', 
                 sep='\s+', header=[0, 1])

In [3]:
# Individual stations (minuite resolution)
DC = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/DeepCove_temperature.dat', 
                 header=2)

DE = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/DiscoveryElementary_temperature.dat', 
                 header=2)

HL = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/Helgesen_temperature.dat', 
                 header=2)

JB = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/JamesBay_temperature.dat', 
                 header=2)

JM = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/JohnMuir_temperature.dat', 
                 header=2)

KT = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/Keating_temperature.dat', 
                 header=2)

US = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/UVicSci_temperature.dat', 
                 header=2)

# Coastline 
MAP = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/DataCoast.dat', 
                 sep='\s+', names = ["Lon", "Lat"])

### Clean up the All Stations data

In [4]:
# Convert times from MatLab time to Python Time
AS['Time'] = AS['NaN', 'NaN'].apply(lambda matlab_datenum: 
                             dt.datetime.fromordinal(int(matlab_datenum)) 
                             + dt.timedelta(days=matlab_datenum%1)
                             - dt.timedelta(days = 366)) 

# Rename the columns
AS2 = AS.rename(index=str, columns={"NaN": "MatLab Time"})

# Set time as index column
AS3 = AS2.set_index('Time')

# Get the longitudes correct 
for i in range(1, np.shape(AS3)[1]):
    New_name = float(list(AS3)[i][1]) - 360
    AS3 = AS3.rename(columns={list(AS)[i][1]: str(format(New_name, '.4f'))})

AS4 = AS3.drop('MatLab Time', axis=1, level=1)
    
AS = AS4

In [5]:
# De-Nan
AS_DN = AS.dropna(axis=0)

### Clean up the minute resolution data

In [6]:
def DateInsert(Data):
    date = pd.date_range(start='2011-12-31 17:00:00.000000', 
                     freq='min', periods = len(Data))
    # Insert dates into D2 dataframe
    Data.insert(loc=0, column='Time', value=date)
    # Rename the columns
    D1 = Data.rename(index=str, columns={Data.columns[1]: "Temperature"})
    # Set index
    DM = D1.set_index('Time')
    return DM

In [7]:
DataM = [DC, DE, HL, JB, JM, KT, US]
DataMLabels = ['Deep Cove', 'Discovery', 'Helgesen', 'James Bay', 'John Muir', 'Keating', 'UVicSci']

In [8]:
DataMIns = [DateInsert(DataM[i]) for i in range(0, len(DataM))]

In [9]:
Mn = {
    "DC" : DataMIns[0].rename(index=str, columns={"Temperature": "DC"}),
    "DE" : DataMIns[1].rename(index=str, columns={"Temperature": "DE"}),
    "HL" : DataMIns[2].rename(index=str, columns={"Temperature": "HL"}),
    "JB" : DataMIns[3].rename(index=str, columns={"Temperature": "JB"}),
    "JM" : DataMIns[4].rename(index=str, columns={"Temperature": "JM"}),
    "KT" : DataMIns[5].rename(index=str, columns={"Temperature": "KT"}),
    "US" : DataMIns[6].rename(index=str, columns={"Temperature": "US"})
}

In [10]:
# De-Nan
Mn_DN  = {
    "DC" : Mn["DC"].dropna(axis=0),
    "DE" : Mn["DE"].dropna(axis=0),
    "HL" : Mn["HL"].dropna(axis=0),
    "JB" : Mn["JB"].dropna(axis=0),
    "JM" : Mn["JM"].dropna(axis=0),
    "KT" : Mn["KT"].dropna(axis=0),
    "US" : Mn["US"].dropna(axis=0)
}

In [11]:
# Matrix of all the minute resolution data
MS = pd.concat([Mn['DC'], Mn['DE'], Mn['HL'], Mn['JB'], Mn['JM'], Mn['KT'], Mn['US']], axis=1, sort=True)

### Get station locations

In [12]:
# Station locations
StationLoc = list(AS)
Lon = np.array([float(list(AS)[i][1]) for i in range(1, np.shape(list(AS))[0])])
Lat = np.array([float(list(AS)[i][0]) for i in range(1, np.shape(list(AS))[0])])

## Create meshgrid for interpolation

In [13]:
step_size = 0.001
x = np.arange(np.min(MAP['Lon']), np.max(MAP['Lon']), step_size)
y = np.arange(np.min(MAP['Lat']), np.max(MAP['Lat']), step_size)

X, Y = np.meshgrid(x, y)

# Get the locations of the data
AS_loc = np.array([[float(AS.columns.values[i][1]), float(AS.columns.values[i][0])] for i in range(len(AS.columns.values))])

## Useful functions

In [14]:
# Get dimensions 
def get_dim(x=X, y=Y):
    dim = [np.min(x), np.max(x), np.min(y), np.max(y)]
    Dim = [float(dim[n]) for n in range(len(dim))]
    return Dim

In [15]:
# Draw the map
def map(title='WHAT\'S THE THE TITLE YOU DONKEY?!?!?', size=(10, 10), land='#FFFFFF', water='#FFFFFF'):
    fig, ax = plt.subplots(1, 1, figsize=size)
    
    AddPoints = pd.DataFrame([[MAP['Lon'].min(), MAP['Lat'].min()], [MAP['Lon'].max(), MAP['Lat'].min()], [MAP['Lon'].max(), MAP['Lat'].max()]], columns=['Lon', 'Lat'])
    MAPIn = MAP.append(AddPoints, ignore_index=True)
    
    ax.fill(MAP['Lon'], MAP['Lat'], land, zorder=0, label='')
    ax.fill(MAPIn['Lon'], MAPIn['Lat'], water, zorder=2, label='')
    ax.plot(MAP['Lon'], MAP['Lat'], linewidth=np.min(size)/12, color='k', zorder=3, label='')

    ax.xaxis.set_major_formatter(StrMethodFormatter(r'{x:.1f}$^\circ$W'))
    ax.yaxis.set_major_formatter(StrMethodFormatter(r'{x:.2f}$^\circ$N'))
    mp.rc('xtick', labelsize=np.min(size)*1.7) 
    mp.rc('ytick', labelsize=np.min(size)*1.7) 
    plt.xticks(rotation=0)
    
    # Old and depricated string format:
    # Map2.xaxis.set_major_formatter(FormatStrFormatter(r'%1.1f$^\circ$W'))
    # Map2.yaxis.set_major_formatter(FormatStrFormatter(r'%1.2f$^\circ$N'))

    ax.set_xlim(-123.8, -123.25)
    ax.set_ylim(48.325, 48.725)
#     ax.grid(dashes=(1,1), color='#555555', zorder=0, linewidth=size[0]/6)
    ax.set_facecolor(water)
    
    ax.set_aspect('equal')
    maptitle = title
    ax.set_title("\n".join(wrap(maptitle, 50)), fontsize=np.min(size)*2.25)
    ax.set_xlabel(r'Longitude', fontsize=np.min(size)*2)
    ax.set_ylabel(r'Latitude', fontsize=np.min(size)*2)

In [16]:
# Heatmap
def heatmap(Array, D=get_dim(), colourmap='coolwarm', size=(10, 8), title='TITLE DONKEY!!!', interp='none'):
    map(title, size, land='#FFFFFF', water='#EEEEEE')
    plt.scatter(Lon, Lat, marker='o', s=5, color='k', zorder=1)
    Array_plot = plt.imshow(Array, extent=D, origin='lower', cmap=colourmap, interpolation=interp)
    plt.colorbar(Array_plot, shrink=0.7, aspect=20, format='%.3f')
    plt.show()

In [17]:
# Emperical orthogonal functions
def EOF(Array=AS_DN, int_type='cubic', locations=AS_loc):
    # The covariance matrix
    Array_CM = Array.cov()

    # Eigenvalue and Eigenvector matrix
    Array_CM_Eval, Array_CM_Evec = np.linalg.eig(Array_CM)
    Array_CM_EvalM = np.identity(len(Array_CM_Eval)) * Array_CM_Eval
    Array_CM_Evec = Array_CM_Evec
    
    # Get the 1st EOF
    EOFs = np.array([griddata(locations, Array_CM_Evec[i], (X, Y), method=int_type) for i in range(len(Array_CM_Eval))])
    return EOFs, Array_CM_Evec, Array_CM_Eval

In [18]:
# Basemap version MAP
# -------------------
# mapx = 8
# mapy = 8

# plt.figure('Map', figsize=(mapx, mapy))
# Map = Basemap(projection='gnom', lat_0 = 48.53, lon_0 = -123.50, 
#                 width=mapx/2 * 10**4, height=mapy/2 * 10**4, resolution='f')

# lon, lat = Map(Lon, Lat)

# Map.fillcontinents(color="#c6ebc6", lake_color='#DDEEFF')
# Map.drawmapboundary(fill_color="#DDEEFF")
# Map.drawcoastlines()
# Map.drawparallels(np.arange(48.3, 49, 0.05), labels=[0, 1, 1, 0])
# Map.drawmeridians(np.arange(-124.0, -123.3, 0.1), labels=[1, 0, 0, 1])

# maptitle = 'Location of weather temperature measurement stations in south-east Vancouver Island'
# plt.title("\n".join(wrap(maptitle, 80)))
# # plt.xlabel('Longitude')
# # plt.ylabel('Latitude')
# Map.plot(lon, lat, marker='o', color='#CC00CC', markersize=4, linewidth=0)
# plt.show()

## Plot the map with base stations 

In [19]:
# Plot the base stations
map2title = 'Location of weather temperature measurement stations in south-east Vancouver Island'
map(map2title, land='#CCEECC', water='#DDEEFF')
plt.scatter(Lon, Lat, marker='o', s=10, color='#CC00CC', zorder=1)
plt.show()

<Figure size 720x720 with 1 Axes>

## Plot the empirical orthogonal functions 

In [20]:
EOFs, Evec, Eval = EOF(int_type='cubic')

In [21]:
heatmap(EOFs[35], title=r'First EOF of Vancouver Island using cubic interpolation. $\lambda=${0:.4f}'.format(Eval[35]), interp='bilinear')

<Figure size 720x576 with 2 Axes>

## Plot the heatmap for all stations data

In [22]:
Test = pd.date_range(AS.index[0], AS.index[-1], freq='h')
Dates = np.array([i.strftime('%Y') for i in Test])


plt.figure(figsize=(20, 10))
plt.imshow(np.array(AS.T), extent=(0, 200, 0, len(AS.columns.values)*2), origin='lower', cmap='coolwarm', interpolation='nearest', filternorm=False)
plt.yticks(np.arange(0.5, (len(AS.columns.values)+0.5)*2, 2), np.array(AS.columns.values), fontsize=10)
# plt.xticks(np.arange(0, 200, int(200/len(Dates))), Dates, fontsize=10) # len(Dates) can not be too long, otherwise divide by zero error
plt.show()

<Figure size 1440x720 with 1 Axes>

In [23]:
Test = pd.date_range(AS.index[0], AS.index[-1], freq='h')
Dates = np.array([i.strftime('%Y') for i in Test])

In [24]:
int(200/len(Dates))

0

## Wavelet transform

In [25]:
widths = np.arange(1, 31)
cwtmatr = cwt(np.array(AS_DN)[:, 0], ricker, widths)
plt.imshow(cwtmatr, cmap='bwr', aspect='auto',
            vmax=abs(cwtmatr).max(), vmin=-abs(cwtmatr).max())
plt.xlabel('Time')
plt.ylabel('Frequency')
plt.title('Wavelet transform')

Text(0.5, 1.0, 'Wavelet transform')

<Figure size 432x288 with 1 Axes>

In [26]:
cwtmatr

array([[-0.22442817,  5.35351447,  2.41886332, ...,  1.25865616,
         6.55050855, 13.15358252],
       [-0.33787129,  5.13752165,  7.11305186, ..., 12.38876121,
        18.79324681, 15.76418878],
       [-0.80285644,  3.674221  ,  6.71174354, ..., 17.24222771,
        18.63514361, 14.31829873],
       ...,
       [ 8.19320945,  9.41797238, 10.59861001, ..., 10.05555487,
         6.60812836,  3.14907578],
       [ 7.83649962,  9.00598996, 10.13396666, ..., 10.25089018,
         6.86631449,  3.47004082],
       [ 7.44545624,  8.56386137,  9.64331642, ..., 10.43693056,
         7.11288024,  3.77710522]])

## Resample

In [27]:
RS = AS.resample('Y').mean().dropna(axis=0)

In [28]:
plt.figure(figsize=(10, 4))
RS.plot(figsize=(10, 4), linewidth=1)
# plt.scatter()
plt.legend('')
plt.xlabel('Date')
plt.ylabel('Temperature')
plt.show()

<Figure size 720x288 with 0 Axes>

<Figure size 720x288 with 1 Axes>

## Correlation between the different stations

In [29]:
MS.corr()

Unnamed: 0,DC,DE,HL,JB,JM,KT,US
DC,1.0,0.98313,0.967003,0.950519,0.91992,0.987959,0.97911
DE,0.98313,1.0,0.961855,0.939776,0.923591,0.978511,0.961845
HL,0.967003,0.961855,1.0,0.946749,0.934992,0.966477,0.974121
JB,0.950519,0.939776,0.946749,1.0,0.94319,0.952806,0.949653
JM,0.91992,0.923591,0.934992,0.94319,1.0,0.925137,0.913019
KT,0.987959,0.978511,0.966477,0.952806,0.925137,1.0,0.976123
US,0.97911,0.961845,0.974121,0.949653,0.913019,0.976123,1.0


In [30]:
AS.corr()

Unnamed: 0_level_0,Unnamed: 1_level_0,48.5745,48.5376,48.4655,48.4608,48.4356,48.5273,48.4529,48.6804,48.4562,48.3891,...,48.4683,48.4744,48.4205,48.6568,48.4533,48.6529,48.4865,48.4623,48.4359,48.4572
Unnamed: 0_level_1,Unnamed: 1_level_1,-123.4460,-123.5010,-123.3210,-123.3930,-123.4860,-123.3700,-123.4770,-123.4570,-123.3380,-123.6960,...,-123.3590,-123.3810,-123.4860,-123.6430,-123.4260,-123.4040,-123.3240,-123.3090,-123.3110,-123.5490
48.5745,-123.446,1.0,0.98007,0.985049,0.980706,0.979014,0.97881,0.981683,0.986206,0.9834,0.942142,...,0.983611,0.981714,0.979849,0.984656,0.978162,0.977812,0.982134,0.972421,0.976809,0.984126
48.5376,-123.501,0.98007,1.0,0.976879,0.97043,0.973872,0.960745,0.983969,0.974319,0.978156,0.938132,...,0.973636,0.976354,0.973975,0.981359,0.968531,0.956679,0.974206,0.963466,0.967268,0.98423
48.4655,-123.321,0.985049,0.976879,1.0,0.990368,0.98512,0.977632,0.987313,0.980953,0.995545,0.950249,...,0.99509,0.991082,0.983908,0.977971,0.986131,0.969929,0.989357,0.979946,0.991882,0.984805
48.4608,-123.393,0.980706,0.97043,0.990368,1.0,0.985932,0.973074,0.985228,0.973434,0.989925,0.958725,...,0.995611,0.990778,0.981568,0.975008,0.995088,0.968112,0.976479,0.966152,0.985974,0.984585
48.4356,-123.486,0.979014,0.973872,0.98512,0.985932,1.0,0.972257,0.993139,0.977446,0.985162,0.955157,...,0.983558,0.984496,0.995403,0.980642,0.989617,0.966495,0.978373,0.972967,0.980343,0.987954
48.5273,-123.37,0.97881,0.960745,0.977632,0.973074,0.972257,1.0,0.972781,0.97954,0.978911,0.947053,...,0.976438,0.976437,0.97072,0.969765,0.973353,0.981386,0.979853,0.972116,0.975433,0.966815
48.4529,-123.477,0.981683,0.983969,0.987313,0.985228,0.993139,0.972781,1.0,0.980596,0.989253,0.956645,...,0.98508,0.989602,0.992211,0.982653,0.986445,0.966529,0.983118,0.977039,0.983039,0.9896
48.6804,-123.457,0.986206,0.974319,0.980953,0.973434,0.977446,0.97954,0.980596,1.0,0.981906,0.937991,...,0.976309,0.981254,0.976841,0.986908,0.972789,0.984083,0.985386,0.980128,0.976958,0.976386
48.4562,-123.338,0.9834,0.978156,0.995545,0.989925,0.985162,0.978911,0.989253,0.981906,1.0,0.954159,...,0.99423,0.992495,0.984388,0.978751,0.986397,0.973282,0.991197,0.98446,0.992339,0.983826
48.3891,-123.696,0.942142,0.938132,0.950249,0.958725,0.955157,0.947053,0.956645,0.937991,0.954159,1.0,...,0.954872,0.952874,0.949122,0.941718,0.958445,0.940669,0.937169,0.923475,0.952484,0.95666
