# Time Series
## DatetimeIndex

In [116]:
# Let's start by importing the packages we use in this chapter
# and by setting the plotting backend to Plotly
import pandas as pd
import numpy as np
pd.options.plotting.backend = "plotly"

In [117]:
# This creates a DatetimeIndex based on a start timestamp,
# number of periods and frequency ("D" = daily).
daily_index = pd.date_range("2020-1-1", periods=5, freq="d")
daily_index

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05'],
              dtype='datetime64[ns]', freq='D')

In [118]:
# This creates a DatetimeIndex based on start/end timestamp.
# The frequency is set to "weekly on Sundays" ("W-SUN").
weekly_index = pd.date_range("2022-05-15", "2022-07-30", freq="W-SUN")
weekly_index

DatetimeIndex(['2022-05-15', '2022-05-22', '2022-05-29', '2022-06-05',
               '2022-06-12', '2022-06-19', '2022-06-26', '2022-07-03',
               '2022-07-10', '2022-07-17', '2022-07-24'],
              dtype='datetime64[ns]', freq='W-SUN')

In [119]:
# Construct a DataFrame based on the weekly_index. This could be
# the visitor count of a museum that only opens on Sundays.
df_ = pd.DataFrame(data=[21, 15,12,34,45,34,56, 33, 45,45,34],
             columns=["visitors"], index=weekly_index)
df_

Unnamed: 0,visitors
2022-05-15,21
2022-05-22,15
2022-05-29,12
2022-06-05,34
2022-06-12,45
2022-06-19,34
2022-06-26,56
2022-07-03,33
2022-07-10,45
2022-07-17,45


In [120]:
## Arno Selbstversuch
data = [34,499,234,294,293, 294, 244,444,423,5,135, 234]
weekly_index = pd.date_range("2022-05-15", periods=len(data))
_df = pd.DataFrame(data=data, index= weekly_index)
_df.plot.bar(width=720, height= 320)

In [121]:
msft = pd.read_csv("csv/MSFT.csv")

In [122]:
msft.sample(3)

Unnamed: 0,Date,Open,High,Low,Close,Adj Close,Volume
5052,2006-03-22,27.08,27.5,26.799999,27.15,19.896133,145696100
6946,2013-09-30,33.0,33.310001,32.700001,33.279999,28.703634,39839500
591,1988-07-14,0.463542,0.465278,0.458333,0.465278,0.297696,34848000


In [123]:
msft.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8622 entries, 0 to 8621
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Date       8622 non-null   object 
 1   Open       8622 non-null   float64
 2   High       8622 non-null   float64
 3   Low        8622 non-null   float64
 4   Close      8622 non-null   float64
 5   Adj Close  8622 non-null   float64
 6   Volume     8622 non-null   int64  
dtypes: float64(5), int64(1), object(1)
memory usage: 471.6+ KB


In [124]:
msft.loc[:, "Date"] = pd.to_datetime(msft["Date"])

In [125]:
msft["Date"]= pd.to_datetime(msft["Date"])

In [126]:
msft.dtypes

Date         datetime64[ns]
Open                float64
High                float64
Low                 float64
Close               float64
Adj Close           float64
Volume                int64
dtype: object

In [127]:
msft = pd.read_csv("csv/MSFT.csv",
                   index_col="Date", parse_dates=["Date"])

In [128]:
msft.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8622 entries, 1986-03-13 to 2020-05-27
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Open       8622 non-null   float64
 1   High       8622 non-null   float64
 2   Low        8622 non-null   float64
 3   Close      8622 non-null   float64
 4   Adj Close  8622 non-null   float64
 5   Volume     8622 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 471.5 KB


In [129]:
msft.loc[:, "Volume"] = msft["Volume"].astype("float")
msft["Volume"].dtype

dtype('float64')

In [130]:
msft = msft.sort_index()

In [131]:
msft.index.date

array([datetime.date(1986, 3, 13), datetime.date(1986, 3, 14),
       datetime.date(1986, 3, 17), ..., datetime.date(2020, 5, 22),
       datetime.date(2020, 5, 26), datetime.date(2020, 5, 27)],
      dtype=object)

In [132]:
msft.loc["2019", "Adj Close"]

Date
2019-01-02     99.099190
2019-01-03     95.453529
2019-01-04     99.893005
2019-01-07    100.020401
2019-01-08    100.745613
                 ...    
2019-12-24    156.515396
2019-12-26    157.798309
2019-12-27    158.086731
2019-12-30    156.724243
2019-12-31    156.833633
Name: Adj Close, Length: 252, dtype: float64

In [133]:
msft.loc["2019-06":"2020-05", "Adj Close"].plot()

## Working with Time Zones

In [134]:
# Add the time information to the date
msft_close = msft.loc[:, ["Adj Close"]].copy()
msft_close.index = msft_close.index + pd.DateOffset(hours=16)
msft_close.head(2)

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
1986-03-13 16:00:00,0.062205
1986-03-14 16:00:00,0.064427


In [135]:
# Make the timestamps time-zone-aware
msft_close = msft_close.tz_localize("America/New_York")
msft_close.head(2)

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
1986-03-13 16:00:00-05:00,0.062205
1986-03-14 16:00:00-05:00,0.064427


In [136]:
msft_close = msft_close.tz_convert("UTC")
msft_close.loc["2020-01-02", "Adj Close"]  # 21:00 without DST

Date
2020-01-02 21:00:00+00:00    159.737595
Name: Adj Close, dtype: float64

In [137]:
msft_close.loc["2020-05-01", "Adj Close"]  # 20:00 with DST

Date
2020-05-01 20:00:00+00:00    174.085175
Name: Adj Close, dtype: float64

## Shifting and Percentage Changes

In [138]:
msft_close.head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
1986-03-13 21:00:00+00:00,0.062205
1986-03-14 21:00:00+00:00,0.064427
1986-03-17 21:00:00+00:00,0.065537
1986-03-18 21:00:00+00:00,0.063871
1986-03-19 21:00:00+00:00,0.06276


In [139]:
msft_close.shift(1).head()

Unnamed: 0_level_0,Adj Close
Date,Unnamed: 1_level_1
1986-03-13 21:00:00+00:00,
1986-03-14 21:00:00+00:00,0.062205
1986-03-17 21:00:00+00:00,0.064427
1986-03-18 21:00:00+00:00,0.065537
1986-03-19 21:00:00+00:00,0.063871


In [140]:
returns = np.log(msft_close / msft_close.shift(1))
returns = returns.rename(columns={"Adj Close": "returns"})
returns.head()

Unnamed: 0_level_0,returns
Date,Unnamed: 1_level_1
1986-03-13 21:00:00+00:00,
1986-03-14 21:00:00+00:00,0.035097
1986-03-17 21:00:00+00:00,0.017082
1986-03-18 21:00:00+00:00,-0.025749
1986-03-19 21:00:00+00:00,-0.017547


In [141]:
# Plot a histogram with the daily log returns
returns.plot.hist()

In [142]:
simple_rets = msft_close.pct_change()
simple_rets = simple_rets.rename(columns={"Adj Close": "simple rets"})
simple_rets.head()

Unnamed: 0_level_0,simple rets
Date,Unnamed: 1_level_1
1986-03-13 21:00:00+00:00,
1986-03-14 21:00:00+00:00,0.035721
1986-03-17 21:00:00+00:00,0.017229
1986-03-18 21:00:00+00:00,-0.025421
1986-03-19 21:00:00+00:00,-0.017394


## Rebasing and Correlation

In [143]:
parts = []  # List to collect individual DataFrames
for ticker in ["AAPL", "AMZN", "GOOGL", "MSFT"]:
    # "usecols" allows us to only read in the Date and Adj Close
    # For a refresher about f-strings, see Chapter 3
    adj_close = pd.read_csv(f"csv/{ticker}.csv",
                            index_col="Date", parse_dates=["Date"],
                            usecols=["Date", "Adj Close"])
    # Rename the column into the ticker symbol
    # (If you type this example by hand, make sure to keep the
    # following lines correctly indented!)
    adj_close = adj_close.rename(columns={"Adj Close": ticker})
    # Append the stock's DataFrame to the parts list
    parts.append(adj_close)

In [144]:
# Combine the 4 DataFrames into a single DataFrame
adj_close = pd.concat(parts, axis=1)
adj_close

Unnamed: 0_level_0,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
1980-12-12,0.405683,,,
1980-12-15,0.384517,,,
1980-12-16,0.356296,,,
1980-12-17,0.365115,,,
1980-12-18,0.375698,,,
...,...,...,...,...
2020-05-22,318.890015,2436.879883,1413.239990,183.509995
2020-05-26,316.730011,2421.860107,1421.369995,181.570007
2020-05-27,318.109985,2410.389893,1420.280029,181.809998
2020-05-28,318.250000,2401.100098,1418.239990,


In [145]:
adj_close = adj_close.dropna()
adj_close#.info()

Unnamed: 0_level_0,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-08-19,1.898969,38.630001,50.220219,17.505459
2004-08-20,1.904534,39.509998,54.209209,17.557100
2004-08-23,1.921849,39.450001,54.754753,17.634779
2004-08-24,1.975645,39.049999,52.487488,17.634779
2004-08-25,2.043664,40.299999,53.053055,17.835468
...,...,...,...,...
2020-05-20,319.230011,2497.939941,1409.160034,185.660004
2020-05-21,316.850006,2446.739990,1406.750000,183.429993
2020-05-22,318.890015,2436.879883,1413.239990,183.509995
2020-05-26,316.730011,2421.860107,1421.369995,181.570007


In [146]:
# Beispiel Arno
_adj_close_rebased = adj_close / adj_close.iloc[0,:] * 100
_adj_close_rebased

Unnamed: 0_level_0,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-08-19,100.000000,100.000000,100.000000,100.000000
2004-08-20,100.293054,102.278014,107.942996,100.294999
2004-08-23,101.204864,102.122703,109.029300,100.738741
2004-08-24,104.037770,101.087233,104.514654,100.738741
2004-08-25,107.619661,104.323060,105.640828,101.885178
...,...,...,...,...
2020-05-20,16810.701544,6466.321192,2805.961547,1060.583467
2020-05-21,16685.370114,6333.781845,2801.162615,1047.844521
2020-05-22,16792.797302,6308.257365,2814.085677,1048.301533
2020-05-26,16679.051159,6269.376247,2830.274386,1037.219344


In [147]:
_adj_close_rebased.plot()

In [148]:
# Use a sample from June 2019 - May 2020
adj_close_sample = adj_close.loc["2019-06":"2020-05", :]
rebased_prices = adj_close_sample / adj_close_sample.iloc[0, :] * 100
rebased_prices.head(2)

Unnamed: 0_level_0,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2019-06-03,100.0,100.0,100.0,100.0
2019-06-04,103.658406,102.178197,101.51626,102.770372


In [149]:
rebased_prices.plot()

In [150]:
# Correlation of daily log returns
returns = np.log(adj_close / adj_close.shift(1))
returns.corr()

Unnamed: 0,AAPL,AMZN,GOOGL,MSFT
AAPL,1.0,0.42491,0.503497,0.486065
AMZN,0.42491,1.0,0.48669,0.485725
GOOGL,0.503497,0.48669,1.0,0.525645
MSFT,0.486065,0.485725,0.525645,1.0


In [151]:
import plotly.express as px

In [152]:
fig = px.imshow(returns.corr(),
                x=adj_close.columns,
                y=adj_close.columns,
                color_continuous_scale=list(
                    reversed(px.colors.sequential.RdBu)),
                zmin=-1, zmax=1)
fig.show()

## Resampling

In [153]:
end_of_month = adj_close.resample("M").last()
end_of_month.head(20)

Unnamed: 0_level_0,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-08-31,2.132708,38.139999,51.236237,17.67363
2004-09-30,2.396127,40.860001,64.864868,17.900215
2004-10-31,3.240182,34.130001,95.415413,18.107374
2004-11-30,4.146072,39.68,91.081078,19.344421
2004-12-31,3.982207,44.290001,96.491493,19.27948
2005-01-31,4.755152,43.220001,97.907906,18.962008
2005-02-28,5.547884,35.18,94.089088,18.209888
2005-03-31,5.153373,34.27,90.345345,17.493368
2005-04-30,4.459579,32.360001,110.110107,18.31122
2005-05-31,4.917161,35.509998,138.773773,18.732344


In [154]:
end_of_month.resample("D").asfreq().head()  # No transformation

Unnamed: 0_level_0,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-08-31,2.132708,38.139999,51.236237,17.67363
2004-09-01,,,,
2004-09-02,,,,
2004-09-03,,,,
2004-09-04,,,,


In [155]:
end_of_month.resample("W-FRI").ffill().head(20)  # Forward fill

Unnamed: 0_level_0,AAPL,AMZN,GOOGL,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2004-09-03,2.132708,38.139999,51.236237,17.67363
2004-09-10,2.132708,38.139999,51.236237,17.67363
2004-09-17,2.132708,38.139999,51.236237,17.67363
2004-09-24,2.132708,38.139999,51.236237,17.67363
2004-10-01,2.396127,40.860001,64.864868,17.900215
2004-10-08,2.396127,40.860001,64.864868,17.900215
2004-10-15,2.396127,40.860001,64.864868,17.900215
2004-10-22,2.396127,40.860001,64.864868,17.900215
2004-10-29,2.396127,40.860001,64.864868,17.900215
2004-11-05,3.240182,34.130001,95.415413,18.107374


## Rolling Windows

In [157]:
# Plot the moving average for MSFT with data from 2019
msft19 = msft.loc["2021", ["Adj Close"]].copy()

# Add the 25 day moving average as a new column to the DataFrame
msft19.loc[:, "25day average"] = msft19["Adj Close"].rolling(25).mean()
msft19.plot()

KeyError: '2021'