In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from pathlib import Path
from tqdm.auto import tqdm
from itertools import cycle

pio.templates.default = "plotly_white"

np.random.seed()
tqdm.pandas()

In [2]:
SAVE_FIGURES = False
if SAVE_FIGURES:
    os.makedirs('imgs/chapter_2', exist_ok=True)

In [3]:
df = pd.read_excel("https://archive.ics.uci.edu/ml/machine-learning-databases/00247/data_akbilgic.xlsx", skiprows=1)

df.head()

  warn(msg)


Unnamed: 0,date,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
0,2009-01-05,0.035754,0.038376,-0.004679,0.002193,0.003894,0.0,0.03119,0.012698,0.028524
1,2009-01-06,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773
2,2009-01-07,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015
3,2009-01-08,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424
4,2009-01-09,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802


In [4]:
pd.to_datetime("13-4-1987").strftime('%d, %B %Y')

  pd.to_datetime("13-4-1987").strftime('%d, %B %Y')


'13, April 1987'

In [5]:
pd.to_datetime('4-1-1987').strftime('%d, %B %Y')

'01, April 1987'

In [6]:
pd.to_datetime('4-1-1987', dayfirst=True).strftime('%d, %B %Y')

'04, January 1987'

In [7]:
pd.to_datetime('4|1|1987', format='%d|%m|%Y').strftime('%d, %B %Y')

'04, January 1987'

In [8]:
df['date'] = pd.to_datetime(df['date'], yearfirst=True)
df['date'].dtype

dtype('<M8[ns]')

In [9]:
df['date'].min(), df['date'].max()

(Timestamp('2009-01-05 00:00:00'), Timestamp('2011-02-22 00:00:00'))

In [10]:
df.head()

Unnamed: 0,date,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
0,2009-01-05,0.035754,0.038376,-0.004679,0.002193,0.003894,0.0,0.03119,0.012698,0.028524
1,2009-01-06,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.01892,0.011341,0.008773
2,2009-01-07,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015
3,2009-01-08,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424
4,2009-01-09,0.00986,0.009658,-0.021533,-0.019873,-0.01271,-0.004474,-0.009764,-0.010989,-0.007802


In [11]:
print(f"""
Date: {df['date'].iloc[0]}
Day of year: {df['date'].dt.day_of_year.iloc[0]}
Day of week: {df['date'].dt.dayofweek.iloc[0]}
Month: {df['date'].dt.month.iloc[0]}
Month Name: {df['date'].dt.month_name().iloc[0]}
Quarter: {df['date'].dt.quarter.iloc[0]}
Year: {df['date'].dt.year.iloc[0]}
ISO Week: {df['date'].dt.isocalendar().week.iloc[0]}
""")


Date: 2009-01-05 00:00:00
Day of year: 5
Day of week: 0
Month: 1
Month Name: January
Quarter: 1
Year: 2009
ISO Week: 2



In [12]:
df.set_index('date', inplace=True)

In [13]:
df['2010-01-04':]

Unnamed: 0_level_0,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-04,0.010229,0.014478,0.015916,0.000000,0.016018,0.000000,0.000000,0.016778,0.008399
2010-01-05,0.013898,0.024019,0.003111,-0.002722,0.004028,0.002535,0.002780,0.001316,0.008067
2010-01-06,0.007957,0.005706,0.000545,0.000409,0.001357,0.004635,0.006938,0.000586,0.005330
2010-01-07,0.007772,0.007498,0.003993,-0.002484,-0.000597,-0.004650,-0.003938,-0.000360,-0.006375
2010-01-08,-0.003189,0.000835,0.002878,0.003027,0.001356,0.010862,-0.002672,0.003554,0.001229
...,...,...,...,...,...,...,...,...,...
2011-02-16,0.008599,0.013400,0.006238,0.001925,0.007952,0.005717,0.018371,0.006975,0.003039
2011-02-17,0.009310,0.015977,0.003071,-0.001186,0.000345,0.002620,0.001686,-0.000581,0.001039
2011-02-18,0.000191,-0.001653,0.001923,0.002872,-0.000723,0.000568,0.005628,0.000572,0.006938
2011-02-21,-0.013069,-0.013706,-0.020742,-0.014239,-0.011275,0.001358,-0.011942,-0.012615,-0.000958


In [15]:
df['2010-01-04': '2010-02-06']

Unnamed: 0_level_0,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-04,0.010229,0.014478,0.015916,0.0,0.016018,0.0,0.0,0.016778,0.008399
2010-01-05,0.013898,0.024019,0.003111,-0.002722,0.004028,0.002535,0.00278,0.001316,0.008067
2010-01-06,0.007957,0.005706,0.000545,0.000409,0.001357,0.004635,0.006938,0.000586,0.00533
2010-01-07,0.007772,0.007498,0.003993,-0.002484,-0.000597,-0.00465,-0.003938,-0.00036,-0.006375
2010-01-08,-0.003189,0.000835,0.002878,0.003027,0.001356,0.010862,-0.002672,0.003554,0.001229
2010-01-11,-0.016131,-0.006033,0.001745,0.000479,0.000704,0.007457,0.002417,-0.000498,0.005447
2010-01-12,-0.004549,-0.004339,-0.009425,-0.016273,-0.00714,0.0,-0.005082,-0.009893,-0.003729
2010-01-13,0.017559,0.021362,0.008291,0.003383,-0.004593,-0.013335,0.0044,-7.5e-05,-0.007263
2010-01-14,0.002074,-0.002076,0.002424,0.004307,0.004503,0.015955,-0.008332,0.0055,0.002313
2010-01-15,-0.013517,-0.015516,-0.010882,-0.019033,-0.007815,0.0068,-0.011861,-0.011115,-0.002056


In [16]:
df[:'2010']

Unnamed: 0_level_0,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2009-01-05,0.035754,0.038376,-0.004679,0.002193,0.003894,0.000000,0.031190,0.012698,0.028524
2009-01-06,0.025426,0.031813,0.007787,0.008455,0.012866,0.004162,0.018920,0.011341,0.008773
2009-01-07,-0.028862,-0.026353,-0.030469,-0.017833,-0.028735,0.017293,-0.035899,-0.017073,-0.020015
2009-01-08,-0.062208,-0.084716,0.003391,-0.011726,-0.000466,-0.040061,0.028283,-0.005561,-0.019424
2009-01-09,0.009860,0.009658,-0.021533,-0.019873,-0.012710,-0.004474,-0.009764,-0.010989,-0.007802
...,...,...,...,...,...,...,...,...,...
2010-12-27,0.001346,0.000634,0.000000,0.000000,-0.002082,0.007444,0.000000,-0.005861,-0.002417
2010-12-28,0.001285,0.000438,0.000771,0.000197,0.000000,-0.006137,0.003504,0.000209,-0.000196
2010-12-29,0.004171,-0.005582,0.001009,0.003346,0.000000,0.005031,0.013300,0.001923,0.007861
2010-12-30,0.001535,0.008438,-0.001509,-0.011687,-0.004245,-0.011240,0.005106,-0.009191,0.003314


In [17]:
df['2010-01': '2010-06']

Unnamed: 0_level_0,ISE,ISE.1,SP,DAX,FTSE,NIKKEI,BOVESPA,EU,EM
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2010-01-04,0.010229,0.014478,0.015916,0.000000,0.016018,0.000000,0.000000,0.016778,0.008399
2010-01-05,0.013898,0.024019,0.003111,-0.002722,0.004028,0.002535,0.002780,0.001316,0.008067
2010-01-06,0.007957,0.005706,0.000545,0.000409,0.001357,0.004635,0.006938,0.000586,0.005330
2010-01-07,0.007772,0.007498,0.003993,-0.002484,-0.000597,-0.004650,-0.003938,-0.000360,-0.006375
2010-01-08,-0.003189,0.000835,0.002878,0.003027,0.001356,0.010862,-0.002672,0.003554,0.001229
...,...,...,...,...,...,...,...,...,...
2010-06-24,0.000166,-0.008024,-0.016946,-0.014455,-0.015236,0.000467,-0.018948,-0.017801,-0.006717
2010-06-25,-0.000920,-0.003591,0.002855,-0.007366,-0.010585,-0.019411,0.013778,-0.007954,-0.002311
2010-06-28,0.010132,0.016951,-0.002036,0.014168,0.004981,-0.004481,-0.009283,0.009125,0.000540
2010-06-29,-0.021816,-0.028443,-0.031508,-0.033893,-0.031547,-0.012798,-0.035613,-0.034903,-0.021033
