# Assignment 3

In [2]:
import numpy as np
import pandas as pd 
import scipy as sci
import matplotlib as mp
import matplotlib.pyplot as plt
import seaborn as sns
import datetime as dt

from itertools import chain
from numpy import pi, cos, sin, exp
from scipy.signal import correlate, square, unit_impulse, welch

%matplotlib inline
%config InlineBackend.figure_format = 'pdf'

### Import data

In [3]:
# Import data
D = pd.read_csv('/Users/Kev/Documents/Uvic/Python/PHYS 411 - Time Series Analysis/Data Sets/AllStations_temperature_h_2017.dat', 
                 sep='\s+', header=1, usecols=[0,35])

In [4]:
# Convert time in D from MatLab time to Python Time
D['Time'] = D['NaN'].apply(lambda matlab_datenum: 
                             dt.datetime.fromordinal(int(matlab_datenum)) 
                             + dt.timedelta(days=matlab_datenum%1)
                             - dt.timedelta(days = 366)) 

# Rename the columns
D2 = D.rename(index=str, 
                columns={"NaN": "MatLab Time", "48.4623": "Temperature"})

In [5]:
# Reorder columns 
cols = D2.columns.tolist()
cols = cols[-1:] + cols[:-1]
D23 = D2[cols]

# Set time as index column
DH = D23.set_index('Time')

In [6]:
# Select the dates:
# Hour resolution data
DH1 = DH.loc['2015-12-01 00:00':'2016-03-01 23:00']['Temperature']
DH2 = DH.loc['2016-06-01 00:00':'2016-09-01 23:00']['Temperature']

### Make autocorrelation function:

$$c_{xx}(t) = E[(x(t)-\mu_x)(x(t+\tau)-\mu_x)]$$

$$c_{xx}(t) = \frac{1}{N-k} \sum_{i=1}^{N-k} (x_i - \mu_x)(x_{i+k}(t)-\mu_x)$$

In [7]:
# def cxx(x, k):
#     f = 1/(len(x)-k)
#     S = 0
#     for i in range(len(x)):
#         mean = np.nanmean(x)
#         s1 = x[i] - mean
#         if i+k <= len(x)-1:
#             s2 = x[i+k] - mean
#         else:
#             exit
#         S += s1 * s2
#     cxx = f * S
#     return cxx

# For loop is highly inefficient 
# Use vectorized code: 
def cxxV(x, k):
    mean = np.nanmean(x)
    f = 1/(len(x)-k)
    X = np.array(x) # Turn x(t) into an numpy array 
    X1 = X - mean # Subtract the mean from each element of X
    XL = X1[k:] # Elements of array from indicies k and onward
    X2 = X1[0:len(XL)] # Elements of the array from up to len(XL)
    XM = XL.dot(X2) # Dot product the two vectors 
    cxx = f * XM
    return cxx

def CxxV(x, k):
    CxxV = np.array([cxxV(x, j) for j in range(k)])
    return CxxV

def RxxV(x, k):
    RxxV = CxxV(x, k)/np.nanvar(x)
    return RxxV

## Question 1: Properties of Fourier transforms 

### b) Fourier transform of a cosine function

$$ F\left\{\cos(\omega_0 a t)\right\} = \frac{1}{2} \left[\delta(af_0 - f) + \delta(af_0 + f)\right]$$

In [8]:
N = 2000
f0 = int(N/4)
f = np.linspace(-2, 2, N)

# a values we want 
# Apparently I can't correctly pronounce "a"
alpha = [1.5, 0.5, 1]

# The function to be plotted
fFunc = lambda a: 0.5*(unit_impulse(N, int(a*f0)) + unit_impulse(N, -int(a*f0)))

# Plot it out 
plt.figure(figsize = (10, 4))
for i in range(len(alpha)):
    plt.plot(f, fFunc(alpha[i]), label='a = {0}'.format(alpha[i]))

plt.title('Fourier transform of $x(at) = cos(\omega_0 at)$ for $f_0 = 1$ and various values of $a$')
plt.xlabel('$f_0$')
plt.legend()

<matplotlib.legend.Legend at 0x1c22a4b780>

<Figure size 720x288 with 1 Axes>

### c) One-sided Fourier transform of $e^{-at}$

$$x(t) = 
\begin{cases} 
  0 & t < 0 \\
  e^{-at} & 0 \leq t  
\end{cases}$$ 

$$F\left\{ x(t) \right\} = \left[a^2 + (2\pi f)^2\right]^{-\frac{1}{2}} e^{-i \tan\left(\frac{2\pi f}{a}\right)}$$

In [9]:
# x(t)
def x1c(a, t):
    x1c = exp(-a*t)
    return x1c

# F{x(t)}
def Fx1c(a, t): 
    fx1c = np.fft.fft(x1c(a, t))
    return fx1c

# def phase(a, f):
#     return 2*pi*f/a
    
# def mag(a, f):
#     m = a**2 + (2*pi*f)**2
#     return m**-0.5

In [13]:
t1c = np.linspace(0, 10, 10000)
fs1c = np.fft.fftfreq(10000, 0.1)
a_Val = 16

fig1, (ax1c1, ax1c2, ax1c3) = plt.subplots(3, 1, figsize=(10, 12))
fig2, (ax1c4, ax1c5, ax1c6) = plt.subplots(3, 1, figsize=(10, 12))

# Plot the function x(t)
for a in range(0, a_Val, 2):
    ax1c1.plot(t1c, x1c(a/10, t1c), label='a = {0}'.format(a/10))
ax1c1.legend()

ax1c1.set_title('$x(t)=e^{-at}$ for various values of $a$')
ax1c1.set_xlabel('Time (s)')
ax1c1.set_ylabel('$x(t)$')

for a in range(0, a_Val, 2):
    # Plot the real part Fourier transform 
    ax1c2.semilogy(fs1c, Fx1c(a, t1c).real, label='a = {0}'.format(a/10))
    # Plot the imaginary part of the Fourier transform 
    ax1c3.plot(fs1c, Fx1c(a, t1c).imag, label='a = {0}'.format(a/10))
    # Plot the magnitude of the Fourier transform 
    ax1c4.semilogy(fs1c, np.abs(Fx1c(a, t1c)), label='a = {0}'.format(a/10))
#     ax1c4.semilogy(fs1c, mag(a, fs1c), label='a = {0}'.format(a/10))
    # Plot the phase of the Fourier transform 
    ax1c5.plot(fs1c, np.angle(Fx1c(a, t1c)), label='a = {0}'.format(a/10))
#     ax1c5.plot(fs1c, phase(1.4, fs1c), label='a = {0}'.format(a/10))
    ax1c6.plot(fs1c, np.angle(Fx1c(a, t1c)), label='a = {0}'.format(a/10))

ax1c2.set_ylim(0.1, 0.2*10**5)
ax1c2.set_xlim(-2, 2)
ax1c2.legend()

ax1c3.set_xlim(-0.2, 0.2)
ax1c3.legend(loc=1)

ax1c4.set_ylim(0.1, 0.2*10**5)
ax1c4.legend()

ax1c5.set_ylim(-1.75, 1.75)
ax1c5.legend(loc=1)

ax1c6.set_xlim(-0.2, 0.2)
ax1c6.set_ylim(-1.75, 1.75)
ax1c6.legend(loc=1)

ax1c2.set_title('Real part of the Fourier transform of $x(t)=e^{-at}$ for various values of $a$')
ax1c2.set_xlabel('Frequency ($Hz$)')
ax1c2.set_ylabel('$Re[X(f)]$')

ax1c3.set_title('Imaginary part of the Fourier transform of $x(t)=e^{-at}$ for various values of $a$')
ax1c3.set_xlabel('Frequency ($Hz$)')
ax1c3.set_ylabel('$Im[X(f)]$')

ax1c4.set_title('Magnitude of the Fourier transform of $x(t)=e^{-at}$ for various values of $a$')
ax1c4.set_xlabel('Frequency ($Hz$)')
ax1c4.set_ylabel('$| X(f) |$')

ax1c5.set_title('Phase of the Fourier transform of $x(t)=e^{-at}$ for various values of $a$')
ax1c5.set_xlabel('Frequency ($Hz$)')
ax1c5.set_ylabel('$\Theta (f)$')

ax1c6.set_title('Magnified phase of the Fourier transform of $x(t)=e^{-at}$ for various values of $a$')
ax1c6.set_xlabel('Frequency ($Hz$)')
ax1c6.set_ylabel('$\Theta (f)$')

fig1.tight_layout()
fig2.tight_layout()

<Figure size 720x864 with 3 Axes>

<Figure size 720x864 with 3 Axes>

As expected, the phase $\theta(f) = -\tan\left(\frac{2\pi f}{a}\right)$ and the magnitude $\left| X(f) \right| = \left[a^2 + (2\pi f)^2\right]^{-\frac{1}{2}}$ decreased with the increase in $a$.

### e) How spikes affect spectral density of a time series

In the language of Fourier series: 

To construct a data spike we need an infinite sum of sine and cosine waves with continuous wavelengths. 
$$\delta(x-\alpha) = \int_{-\infty}^{\infty} e^{i 2 \pi (x-\alpha)n} dn$$

As energy is proportional to wavelenth, this spike (the delta function) will contain all values of energy. Plotting the power spectral density of a time series containing a spike cause additional energies to be plotted for every frequency, rendering the power spectrum useless. 

## Question 2: Quality of dummy weather forcast

### 1) Winter data set:

In [10]:
plt.figure(figsize = (15, 6))
ax1 = DH1.plot(label = 'Temperature')
plt.title('Temperature data from UVic Sci from 1 Dec 2015 to 1 Mar 2016', fontsize=16)
plt.xlabel('Time (hr)', fontsize=14)
plt.ylabel('Temperature ($^\circ C$)', fontsize=14)
ax1.xaxis.set_major_locator(mp.dates.DayLocator(bymonthday = (1, 8, 15, 22)))
plt.show()

<Figure size 1080x432 with 1 Axes>

#### Plot lag correlation

In [13]:
Days = 40
Hrs = 24

# Make the time lag
TL1 = [DH1.asof(DH1.index + pd.Timedelta(i, 'h')) for i in range(Days*Hrs)]

# TL12 = [DH1.shift(i) for i in range(Days*Hrs)] 
# ^ Also works, but shifts temperature indicies down. 
# Truncades elements that falls outside of len(DH1)

# Calculate the correlation coefficient 
rxx1 = [sci.stats.pearsonr(DH1, TL1[i])[0] for i in range(np.shape(TL1)[0] - 1)]
rxx13 = RxxV(DH1, Days*Hrs)

In [14]:
plt.figure(figsize = (10, 4))
plt.plot(rxx13, label=r'$r_{xx}[x(t+\tau)]$ (My function)')
plt.plot(rxx1,  label=r'$r_{xx}[x(t+\tau)]$ (Built-in Pearson)')
plt.plot([np.exp(-1) for i in range(len(rxx13))], label='$e^{-1}$')
plt.title('Lag correlation for the temperature data from UVic Sci from 1 Dec 2015 to 1 Mar 2016')
plt.xlabel(r'Lag ($\tau$) [Hours]')
plt.ylabel(r'$r_{xx}[x(t+\tau)]$')
plt.legend()
plt.show()

<Figure size 720x288 with 1 Axes>

### 2) Summer data set:

In [15]:
plt.figure(figsize = (15, 6))
ax2 = DH2.plot(label = 'Temperature')
plt.title('Temperature data from UVic Sci from 1 Jun 2016 to 1 Sep 2016',  fontsize=16)
plt.xlabel('Time (hr)', fontsize=14)
plt.ylabel('Temperature ($^\circ C$)', fontsize=14)
ax2.xaxis.set_major_locator(mp.dates.DayLocator(bymonthday = (1, 8, 15, 22)))
plt.show()

<Figure size 1080x432 with 1 Axes>

#### Plot lag correlation

In [16]:
# Make the time lag
TL2 = [DH2.asof(DH2.index + pd.Timedelta(i, 'h')) for i in range(Days*Hrs)]
# TL22 = [DH2.shift(i) for i in range(Days*Hrs)]

# Calculate the correlation coefficient 
rxx2 = [sci.stats.pearsonr(DH2, TL2[i])[0] for i in range(np.shape(TL2)[0] - 1)]
rxx22 = RxxV(DH2, Days*Hrs)

In [17]:
plt.figure(figsize = (10, 4))
plt.plot(rxx22, label=r'$r_{xx}[x(t+\tau)]$ (My function)')
plt.plot(rxx2, label=r'$r_{xx}[x(t+\tau)]$ (Built-in Pearson)')
plt.plot([np.exp(-1) for i in range(len(rxx2))], label='$e^{-1}$')
plt.title('Lag correlation for the temperature data from UVic Sci from from 1 Jun 2016 to 1 Sep 2016')
plt.xlabel(r'Lag ($\tau$) [Hours]')
plt.ylabel(r'$r_{xx}[x(t+\tau)]$')
plt.legend()
plt.show()

<Figure size 720x288 with 1 Axes>

### 2) Compare, contrast, thoughts, and wishes: 

Variance in temperature for the summer data was much greater than the winter data. This may have caused the fluctuations in the lag correlation for the summer data to be greater than the winter data. While the lag correlation for both data sets have envelopes above $e^{-1}$ up to around a week, values for the winter data remained above $e^{-1}$ within that period, with daytime values for the summer data constantly dipping below $e^{-1}$. This means temperature predictions during the night for summer days up to about a week is accurate, but the day tempratures will be inaccurate. Predictions for temperatures during the winter is accurate up to a week, regaurdless of day or night, however, it will become much more difficult to predict accuratly into the future compared to the summer data (at least during the nighttime). 

Moral of the story (or rather, plots). Do not trust tempreature predictions after a week, expecially if it is during the winter. Also, do not trust daytime temperature predictions during the summer. 

# Question 3: Fourier series of a square wave

$$x(t) = 
\begin{cases} 
  -1 & -\frac{T}{2} < t < 0 \\
  1 & 0 \leq t < \frac{T}{2} 
\end{cases} 
\ \ \ \ \ \ \ \ \
x(t+nT) = x(t)$$

We found that:

$$x(t) \sim \frac{1}{2} \sum_{m=-\infty}^{\infty} S_m e^{i\left(\frac{m\pi}{T} \right) t}$$

$$S_m = \frac{i}{m\pi} \left[ 1 - \cos\left(m\pi\right)\right] =
\begin{cases} 
  \frac{2i}{m\pi} & \text{for } m \text{ odd} \\
  0 & \text{for } m \text{ even}
\end{cases}$$

We can reduce this into a Fourier sine series, which makes calulations easier:

$$x(t) \sim 2\sum_{n=1}^{\infty} b_m \sin\left(\frac{m\pi}{T} t \right)$$

$$b_m = \frac{1}{m\pi} \left[ 1 - \cos\left(m\pi\right)\right] =\frac{1}{m\pi} \left[ 1 - (-1)^m\right]
\begin{cases} 
  \frac{2}{m\pi} & \text{for } m \text{ odd} \\
  0 & \text{for } m \text{ even}
\end{cases}$$

Let's plot the graph as a function of T

In [18]:
# The amount of terms we want 
M = [1, 2, 5, 10, 100]

In [19]:
n_terms = max(M)
steps = 10**4
half_interval = 1.5
T = np.linspace(-half_interval, half_interval, steps)

# j+1 because n starts at n=1
# t = -half_interval + 2*half_interval*i/steps
# Where:
#     -half_interval is the start 
#     2*half_interval*i/steps is the timesteps 
def series(i,j):
    return 2/((j+1)*pi) * (1 - (-1)**(j+1)) * sin((j+1) * pi * (-half_interval + 2*half_interval*i/steps)) 

In [20]:
# Make an array with 
# n terms along the columns 
# t terms along the rows 
X1 = np.fromfunction(series, (steps, n_terms))

# Make a cumulative sum of the columms 
SL1 = np.cumsum(X1, axis=1)

In [26]:
plt.figure(figsize = (10, 4))

# Pick out the sum we want and plot it 
for i in range(0, len(M)):
    plt.plot(T, SL1[:, M[i]-1], label='n = {0}'.format(M[i]))

plt.plot(T, square(pi*T), label='Exact')
plt.title('Fourier series approximation of an odd square-wave function for $n$ terms and the exact function')
plt.xlabel('$T$')
plt.ylabel('$x(T)$')
plt.legend()
plt.show()

<Figure size 720x288 with 1 Axes>

### Generate the power spectrum

In [22]:
steps = 10**5
half_interval = 50
T2, dT = np.linspace(-half_interval, half_interval, steps, retstep=True)

X2 =  np.fromfunction(series, (steps, n_terms))
SL2 = np.cumsum(X2, axis=1)

In [23]:
# Fourier transform the series 
# FX2 = np.apply_along_axis(np.fft.rfft, 1, SL2)
# GX2 = lambda i: welch(SL2[:, M[i]-1], 200, window='hanning', nperseg=steps, scaling='spectrum')[1]
GX2 = lambda i: welch(SL2[:, M[i]-1], 200, nperseg = steps,\
                      window=sci.signal.windows.hann(int(steps)),\
                      noverlap = steps/2, nfft = steps, detrend=False,\
                      return_onesided=True, scaling = 'spectrum')[1]

GXE = welch(square(pi*T2), 200, nperseg=steps,\
            window=sci.signal.windows.hann(int(steps)),\
            noverlap = steps/2, nfft = steps, detrend=False,\
            return_onesided=True, scaling = 'spectrum')[1]

FS2 = np.fft.rfftfreq(steps, dT)

In [25]:
plt.figure(figsize = (12, 5))

plt.loglog(GXE, label='Exact')
for i in range(len(M)-1, -1, -1):
    plt.loglog(GX2(i), label='n = {0}'.format(M[i]))

plt.title('Power specturm of a Fourier series approximation to a square wave function for various terms ($n$)', fontsize=12)
plt.xlabel('Frequency ($Hz$)', fontsize=12)
plt.ylabel(r'$G_{xx}(f)$ [$\frac{V^2}{Hz}$]', fontsize=12)
plt.legend(fontsize=12)
plt.show()

<Figure size 864x360 with 1 Axes>