# Creating the log-value difference target for our model from daily BDI values.

In [17]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [18]:
bdi = pd.read_csv("../raw_data/data/BDI/interpolated_daily_BDI.csv")

In [19]:
bdi.tail(2)

Unnamed: 0,Date,Price
7064,2022-11-11,1222.2
7065,2022-11-14,1189.0


$$target = (log(𝑦𝑡)−log(𝑦𝑡−1))=log(𝑦𝑡/𝑦𝑡−1)$$

# Set-up:

### For daily log-value difference:

1. create log_price column
2. create a shifted log_price column
3. calculate differences? 

### For weekly (i.e. 5 day since weekends are not included) log-value difference:
1. create log_price column
2. create weekly log-price column 
3. create a shifted weekly log-price column
4. calculate difference
5. read the new columns as csv
6. attempt to implement them in a univariate model to test it out!

# For Daily log-value difference

In [20]:
bdi["abs_price"] = bdi["Price"].diff()

In [21]:
bdi["log_price"] = np.log10(bdi["Price"])

In [22]:
bdi.head(2)

Unnamed: 0,Date,Price,abs_price,log_price
0,1995-01-03,1964.0,,3.293141
1,1995-01-04,1961.0,-3.0,3.292478


In [23]:
bdi["log_diff"] = bdi["log_price"].diff()

In [24]:
bdi.head(2)

Unnamed: 0,Date,Price,abs_price,log_price,log_diff
0,1995-01-03,1964.0,,3.293141,
1,1995-01-04,1961.0,-3.0,3.292478,-0.000664


In [25]:
bdi.to_csv("/Users/justinrlawes/code/LeibnizianOptimist/bdi_predict/raw_data/data/BDI/log_diff_BDI_daily.csv")

# For weekly log_value diference

The aim would be to predict the week log difference (i.e. a horizon of 5 from the daily inputs) using a predefined set of previous days as inputs. 

## Creating a weekly log-price column 

In [14]:
log_price_weekly = bdi.loc[::5, "log_price"]

In [15]:
log_price_weekly.head(4)

0     3.293141
5     3.304275
10    3.316180
15    3.307924
Name: log_price, dtype: float64

In [16]:
bdi["log_price"].head(16)

0     3.293141
1     3.292478
2     3.293804
3     3.297323
4     3.299725
5     3.304275
6     3.305136
7     3.309204
8     3.313867
9     3.315340
10    3.316180
11    3.316180
12    3.315970
13    3.313656
14    3.310481
15    3.307924
Name: log_price, dtype: float64

## Creating a shifted weekly log price column

In [36]:
log_price_weekly_shifted = log_price_weekly.shift(-1)

In [37]:
log_diff_weekly = log_price_weekly - log_price_weekly_shifted 

In [38]:
log_diff_weekly.head(3)

0    -0.011134
5    -0.011905
10    0.008256
Name: log_price, dtype: float64

In [39]:
len(log_diff_weekly)

1414

In [40]:
1414*5

7070

In [41]:
len(bdi)

7066

In [42]:
bdi["log_price_weekly"] = log_price_weekly_shifted.shift(1)

In [43]:
bdi.loc[1, "log_price"]- bdi.loc[5, "log_price"]

-0.011797456809344187

In [44]:
bdi["log_diff_weekly"] = log_diff_weekly.shift(1)

In [45]:
bdi.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7066 entries, 0 to 7065
Data columns (total 6 columns):
 #   Column            Non-Null Count  Dtype  
---  ------            --------------  -----  
 0   Date              7066 non-null   object 
 1   Price             7066 non-null   float64
 2   log_price         7066 non-null   float64
 3   log_diff          7065 non-null   float64
 4   log_diff_weekly   1413 non-null   float64
 5   log_price_weekly  1413 non-null   float64
dtypes: float64(5), object(1)
memory usage: 331.3+ KB


In [46]:
bdi.head(16)

Unnamed: 0,Date,Price,log_price,log_diff,log_diff_weekly,log_price_weekly
0,1995-01-03,1964.0,3.293141,,,
1,1995-01-04,1961.0,3.292478,0.000664,,
2,1995-01-05,1967.0,3.293804,-0.001327,,
3,1995-01-06,1983.0,3.297323,-0.003518,,
4,1995-01-09,1994.0,3.299725,-0.002402,,
5,1995-01-10,2015.0,3.304275,-0.00455,-0.011134,3.304275
6,1995-01-11,2019.0,3.305136,-0.000861,,
7,1995-01-12,2038.0,3.309204,-0.004068,,
8,1995-01-13,2060.0,3.313867,-0.004663,,
9,1995-01-16,2067.0,3.31534,-0.001473,,


In [47]:
bdi.to_csv("/Users/justinrlawes/code/LeibnizianOptimist/bdi_predict/raw_data/data/BDI/log_diff_BDI_weekly.csv")