## 06 - Example: financial data

In [None]:
import pandas as pd
data = pd.read_csv('financial_data.csv', index_col='Date')
data.index = pd.to_datetime(data.index, format='%d.%m.%y')

In [None]:
data.head()

In [None]:
prices = data['Close']

In [None]:
prices.index

In [None]:
prices['2012-01-01':'2012-04-01']

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt

# Plot the trend and histogram of prices together
# initialize two subplots in 1 row and two columns. Also specify the width of each plot
fig, ax = plt.subplots(1,2,figsize=(14, 5),gridspec_kw={'width_ratios': [3, 1]})

# create the first plot for the prices over time
ax[0].set_title('Daily Prices Over Time')
ax[0].set_xlabel('Date')
ax[0].set_ylabel('Daily Prices')
ax[0].plot(prices)
for i in [1,20,60,140]:
    ax[0].axhline(i,color='r', linewidth=1, linestyle='--')
    
# create the histogram of prices in the second plot
ax[1].set_title('Distribution of Daily Prices')
ax[1].set_xlabel('Frequency')
ax[1].hist(prices,bins=90,orientation='horizontal')
for i in [1,20,60,140]:
    ax[1].axhline(i,color='r', linewidth=1, linestyle='--')

fig.tight_layout()
plt.savefig('THUMB_PRICES.png')
plt.show()

## 07 - Shifting and computing first differences

In [None]:
Pt=prices['2019-12-15':'2019-12-30']
Pt

In [None]:
Pt.shift()

In [None]:
Pt.shift(-1)

In [None]:
p=pd.concat([Pt.shift(-1),Pt,Pt.shift(1),Pt.shift(2)],axis=1)
p.columns=['Pt+1', 'Pt','Pt-1','Pt-2']
print(p)

#### Visualise effect on the time series

In [None]:
Pt.plot(figsize=(8, 3), label='original, Pt')
Pt.shift(-1,freq='D').plot(figsize=(8, 4), label='backward, Pt+1')
Pt.shift(1,freq='D').plot(figsize=(8, 4), label='forward, Pt-1')
plt.legend()
plt.show()

### Example - Analyzing the first difference

In [None]:
# Compute first difference
diff = prices - prices.shift(1)

# Plot the results

plt.subplot(121)
diff.plot(figsize=(15, 4))
plt.title('Daily changes plotted over time')
plt.ylabel('Day-to-day change ($)')

plt.subplot(122)
diff.hist(figsize=(15, 4),bins=100, grid=False)
plt.title('Distribution of Daily Changes')
plt.xlabel('Day-to-day change ($)')

plt.show()

In [None]:
# Compute daily returns: divide the first difference by the price in the previous day
daily_percentage_change = 100*diff/prices.shift(1)

# drop the first value which is NaN
daily_percentage_change.dropna(inplace=True)

# Plot the results and original prices together
# initialize two subplots in 1 row and two columns. Also specify the width of each plot
fig, ax1 = plt.subplots(1,2,figsize=(14, 4),gridspec_kw={'width_ratios': [3, 1]})

# subplot for daily_percentage_change
color = 'tab:blue'
ax1[0].set_xlabel('Date')
ax1[0].set_ylabel('Daily returns (%)', color=color)
ax1[0].plot(daily_percentage_change, color=color)
ax1[0].tick_params(axis='y', labelcolor=color)

# Create twin axes, so they can share the same xaxis, i.e. Dates
ax2 = ax1[0].twinx() 

# daily prices on the same subplot
color = 'tab:red'
ax2.set_ylabel('Daily Prices', color=color)
ax2.plot(prices, color=color)
ax2.tick_params(axis='y', labelcolor=color)

plt.title('Daily Returns and Prices')
 
# create the second subplot which is the histogram of daily_percentage_change
ax1[1].hist(daily_percentage_change,bins=100,orientation='horizontal')
ax1[1].set_title('Distribution of Daily Returns')

fig.tight_layout()
plt.show()

### Example - Return of investment

In [None]:
# Net profit over a year (in working days)
net_profit = prices-prices.shift(260)

# Return on Investment
roi = 100*net_profit/prices.shift(260)  

# Plot result
roi['2019-09-01':'2020-08-28'].plot(figsize=(8, 4))
plt.ylabel('Return on Investment')
plt.show()

### Appendix

In [None]:
Pt.diff(1)

In [None]:
Pt-Pt.shift(1)

In [None]:
Pt

In [None]:
Pt.shift(1)

In [None]:
Pt.shift(1, freq='B')

In [None]:
data=[1,2,'t',4]
indices=[4, 'Tokyo', 'blue', 'dog']
s=pd.Series(data, indices)
s

In [None]:
s.shift(1)

## 08 - Resampling and converting frequencies

In [None]:
# Create the figure
plt.figure(figsize=(8, 4))

# Plot the prices
prices.plot(alpha=0.5)

# Downsampling - end of business year
# Variant 1: using resample()
prices.resample('BA').mean().plot()

# Variant 2: using asfreq()
prices.asfreq('BA').plot()

plt.legend(['prices', 'resample', 'asfreq'], loc='upper left')
plt.show()

In [None]:
# Create the figure
plt.figure(figsize=(8, 4))

# Upsampling
# "backward fill" - fill missing with the closest value on the right
prices.asfreq('D', method='bfill')[0:100].plot(alpha=0.5)

# "forward fill" - fill missing with the closest value on the left
prices.asfreq('D', method='ffill')[0:100].plot(alpha=0.5)

plt.legend(['bfill', 'ffill'], loc='upper left')
plt.show()

In [None]:
from scipy.stats import skew,kurtosis
import numpy as np
# calculate skewness and kurtosis for the resampled data 
s,k=[],[]
for resamp in ['B','W','M','Q']:
    s.append(skew(prices.pct_change().resample(resamp).mean().dropna()))
    k.append(kurtosis(prices.pct_change().resample(resamp).mean().dropna()))

In [None]:
fig, axes = plt.subplots(nrows=2, ncols=2, figsize=(14, 10), gridspec_kw={'hspace': 0.35, 'wspace': 0.3})

for (ax,resamp,freq,n_bin,i) in zip(axes.flatten(),
                                    ['B','W','M','Q'],                               
                                    ['Daily','Weekly','Monthly','Quarterly'],
                                    [96,44,21,12],
                                    [0,1,2,3]
                                   ):
    
    ax.hist(100*prices.pct_change().resample(resamp).mean(), n_bin)
    
    ax.set(xlabel = freq+' Price Changes (%)', 
           ylabel = 'Frequency',
           title  = 'skewness {:.2f}'.format(s[i])+
                    ', kurtosis {:.1f}'.format(3+k[i])
          )
plt.show()

## 09 - Rollings windows

In [None]:
# Compute rolling mean and std
rolling_mean_1year = prices.rolling(260, center=True).mean()
rolling_std_1year = prices.rolling(260, center=True).std()

#Setup plot
fig, axes = plt.subplots(2, sharex=False, gridspec_kw={"height_ratios": (0.5, 0.5)}, figsize=(10,8))

# Plot rolling mean and std
axes[0].set_title('Rolling mean and standard deviation')
axes[0].plot(prices, alpha=0.5)
axes[0].plot(rolling_mean_1year, label='rolling mean')
axes[0].legend()
axes[1].plot(rolling_std_1year, c='red', label='rolling std')
axes[1].legend()
plt.show()

### Using rolling windows to detect outliers in time series

In [None]:
def detect_outliers_rolling(timeseries, days=10, n_std=2, center=False):

    rolling_mean = timeseries.rolling(window=days, center=center).mean()
    rolling_std = timeseries.rolling(window=days, center=center).std()
    lower_bound = rolling_mean - (n_std * rolling_std)
    upper_bound = rolling_mean + (n_std * rolling_std)

    # Identify outliers
    mask = (timeseries < lower_bound) | (timeseries > upper_bound)
    mask.sum()
    outliers = timeseries[mask]

    # Plot bounds and outliers
    plt.figure(figsize=(12, 6))
    plt.plot(timeseries, "lightblue", label="Time series")
    plt.plot(rolling_mean, "orange", label="Rolling mean")
    plt.plot(upper_bound, "r--", label="Bounds", alpha=0.5)
    plt.plot(lower_bound, "r--", label="", alpha=0.5)
    plt.plot(outliers, "ro", markersize=5, label="Outlier candidate")
    plt.legend(loc="best")
    plt.grid(True)
    plt.title(
        "{} outliers detected using a moving window of size = {}".format(
            len(outliers), days
        )
    )
    plt.show()

In [None]:
detect_outliers_rolling(prices['2002':'2004'], days=20, n_std=3, center=False)