In [1]:
# Pandas
from config import DB_USER, DB_PASS, DB_ENDPOINT
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.model_selection import TimeSeriesSplit

# SQL Alchemy
from sqlalchemy import create_engine

In [2]:
 # Create Engine
engine = create_engine(f"postgresql://{DB_USER}:{DB_PASS}@{DB_ENDPOINT}")
conn = engine.connect()

In [3]:
# Query All Records in the the Database
data = pd.read_sql("SELECT * FROM historical", conn)
data.head()

Unnamed: 0,index,crypto,time,open,high,low,close,volume,trade
0,0,bitcoin_usdt,1511914000.0,9896.79,11300.03,8520.0,9687.88,13352.538715,8662.632362
1,1,bitcoin_usdt,1512000000.0,9687.88,10900.0,8850.8,9838.96,9389.574329,5360.660242
2,2,bitcoin_usdt,1512086000.0,9837.0,10898.0,9380.0,10782.99,6134.923633,3269.868588
3,3,bitcoin_usdt,1512173000.0,10775.04,11190.0,10620.0,10890.01,4765.439757,2390.194275
4,4,bitcoin_usdt,1512259000.0,10902.69,11825.0,10500.0,11165.41,5346.636524,2574.246187


In [4]:
data = data.drop(labels=["index"], axis=1)
data

Unnamed: 0,crypto,time,open,high,low,close,volume,trade
0,bitcoin_usdt,1.511914e+09,9896.79000000,11300.03000000,8520.00000000,9687.88000000,13352.53871500,8662.63236200
1,bitcoin_usdt,1.512000e+09,9687.88000000,10900.00000000,8850.80000000,9838.96000000,9389.57432900,5360.66024200
2,bitcoin_usdt,1.512086e+09,9837.00000000,10898.00000000,9380.00000000,10782.99000000,6134.92363300,3269.86858800
3,bitcoin_usdt,1.512173e+09,10775.04000000,11190.00000000,10620.00000000,10890.01000000,4765.43975700,2390.19427500
4,bitcoin_usdt,1.512259e+09,10902.69000000,11825.00000000,10500.00000000,11165.41000000,5346.63652400,2574.24618700
...,...,...,...,...,...,...,...,...
6024,solana_usdt,1.637798e+09,205.76000000,216.35000000,201.93000000,209.97000000,1764837.19000000,886350.87000000
6025,solana_usdt,1.637885e+09,209.94000000,210.53000000,182.70000000,192.42000000,2776105.44000000,1241556.23000000
6026,solana_usdt,1.637971e+09,192.37000000,199.39000000,190.61000000,192.74000000,1028955.27000000,500367.73000000
6027,solana_usdt,1.638058e+09,192.74000000,202.10000000,180.93000000,200.69000000,1560980.85000000,789804.92000000


In [5]:
ethereum_df = data[data["crypto"] == "ethereum_usdt"]
ethereum_df = ethereum_df.reset_index(drop=True)

In [6]:
#take only the first 30 rows.
ethereum_df = ethereum_df.iloc[:30]

In [7]:
ethereum_df["time"] = pd.to_datetime(ethereum_df['time'], unit="s")
ethereum_df

Unnamed: 0,crypto,time,open,high,low,close,volume,trade
0,ethereum_usdt,2017-11-29,466.17,515.0,385.0,421.7,51313.49571,23825.39377
1,ethereum_usdt,2017-11-30,421.92,460.0,387.01,427.43,45184.22265,24018.42012
2,ethereum_usdt,2017-12-01,428.05,465.97,413.74,457.51,33072.21995,19043.24922
3,ethereum_usdt,2017-12-02,457.53,471.91,445.0,457.63,25796.14,12336.59141
4,ethereum_usdt,2017-12-03,457.31,485.9,440.11,460.3,24490.05997,12239.10293
5,ethereum_usdt,2017-12-04,460.3,470.98,445.0,466.67,23182.65949,11708.14715
6,ethereum_usdt,2017-12-05,466.66,468.67,445.21,452.26,25387.11695,12902.22244
7,ethereum_usdt,2017-12-06,452.26,455.1,400.0,411.01,38638.76036,16000.21644
8,ethereum_usdt,2017-12-07,413.5,427.61,375.01,406.33,41343.73269,17174.99356
9,ethereum_usdt,2017-12-08,406.52,456.78,390.0,442.56,41064.60305,17149.6329


In [8]:
#actual valuues for last 3 days
original_close = ethereum_df["close"][-3:]
print(original_close)

27    750.00000000
28    739.89000000
29    707.89000000
Name: close, dtype: object


In [9]:
# make the predictions 3 days in the future for the target.
projection = 3
ethereum_df["prediction"] = ethereum_df["close"].shift(-projection, fill_value=0)
ethereum_df

Unnamed: 0,crypto,time,open,high,low,close,volume,trade,prediction
0,ethereum_usdt,2017-11-29,466.17,515.0,385.0,421.7,51313.49571,23825.39377,457.63
1,ethereum_usdt,2017-11-30,421.92,460.0,387.01,427.43,45184.22265,24018.42012,460.3
2,ethereum_usdt,2017-12-01,428.05,465.97,413.74,457.51,33072.21995,19043.24922,466.67
3,ethereum_usdt,2017-12-02,457.53,471.91,445.0,457.63,25796.14,12336.59141,452.26
4,ethereum_usdt,2017-12-03,457.31,485.9,440.11,460.3,24490.05997,12239.10293,411.01
5,ethereum_usdt,2017-12-04,460.3,470.98,445.0,466.67,23182.65949,11708.14715,406.33
6,ethereum_usdt,2017-12-05,466.66,468.67,445.21,452.26,25387.11695,12902.22244,442.56
7,ethereum_usdt,2017-12-06,452.26,455.1,400.0,411.01,38638.76036,16000.21644,460.0
8,ethereum_usdt,2017-12-07,413.5,427.61,375.01,406.33,41343.73269,17174.99356,427.35
9,ethereum_usdt,2017-12-08,406.52,456.78,390.0,442.56,41064.60305,17149.6329,508.7


In [10]:
ethereum_df = ethereum_df.set_index("time")
ethereum_df

Unnamed: 0_level_0,crypto,open,high,low,close,volume,trade,prediction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2017-11-29,ethereum_usdt,466.17,515.0,385.0,421.7,51313.49571,23825.39377,457.63
2017-11-30,ethereum_usdt,421.92,460.0,387.01,427.43,45184.22265,24018.42012,460.3
2017-12-01,ethereum_usdt,428.05,465.97,413.74,457.51,33072.21995,19043.24922,466.67
2017-12-02,ethereum_usdt,457.53,471.91,445.0,457.63,25796.14,12336.59141,452.26
2017-12-03,ethereum_usdt,457.31,485.9,440.11,460.3,24490.05997,12239.10293,411.01
2017-12-04,ethereum_usdt,460.3,470.98,445.0,466.67,23182.65949,11708.14715,406.33
2017-12-05,ethereum_usdt,466.66,468.67,445.21,452.26,25387.11695,12902.22244,442.56
2017-12-06,ethereum_usdt,452.26,455.1,400.0,411.01,38638.76036,16000.21644,460.0
2017-12-07,ethereum_usdt,413.5,427.61,375.01,406.33,41343.73269,17174.99356,427.35
2017-12-08,ethereum_usdt,406.52,456.78,390.0,442.56,41064.60305,17149.6329,508.7


In [11]:
# drop the crypto column which is just a name
ethereum_df = ethereum_df.drop("crypto", axis=1)
ethereum_df

Unnamed: 0_level_0,open,high,low,close,volume,trade,prediction
time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2017-11-29,466.17,515.0,385.0,421.7,51313.49571,23825.39377,457.63
2017-11-30,421.92,460.0,387.01,427.43,45184.22265,24018.42012,460.3
2017-12-01,428.05,465.97,413.74,457.51,33072.21995,19043.24922,466.67
2017-12-02,457.53,471.91,445.0,457.63,25796.14,12336.59141,452.26
2017-12-03,457.31,485.9,440.11,460.3,24490.05997,12239.10293,411.01
2017-12-04,460.3,470.98,445.0,466.67,23182.65949,11708.14715,406.33
2017-12-05,466.66,468.67,445.21,452.26,25387.11695,12902.22244,442.56
2017-12-06,452.26,455.1,400.0,411.01,38638.76036,16000.21644,460.0
2017-12-07,413.5,427.61,375.01,406.33,41343.73269,17174.99356,427.35
2017-12-08,406.52,456.78,390.0,442.56,41064.60305,17149.6329,508.7


In [12]:
ethereum_df.dtypes

open          object
high          object
low           object
close         object
volume        object
trade         object
prediction    object
dtype: object

In [13]:
# convert to numerical
ethereum_df = ethereum_df.astype({"open": float, "high": float, "low":float, "close": float, "volume": float, "trade": float, "prediction": float})
ethereum_df.dtypes

open          float64
high          float64
low           float64
close         float64
volume        float64
trade         float64
prediction    float64
dtype: object

In [14]:
X = ethereum_df.drop("prediction", axis=1)
X = X[:-projection].values
X

array([[   466.17   ,    515.     ,    385.     ,    421.7    ,
         51313.49571,  23825.39377],
       [   421.92   ,    460.     ,    387.01   ,    427.43   ,
         45184.22265,  24018.42012],
       [   428.05   ,    465.97   ,    413.74   ,    457.51   ,
         33072.21995,  19043.24922],
       [   457.53   ,    471.91   ,    445.     ,    457.63   ,
         25796.14   ,  12336.59141],
       [   457.31   ,    485.9    ,    440.11   ,    460.3    ,
         24490.05997,  12239.10293],
       [   460.3    ,    470.98   ,    445.     ,    466.67   ,
         23182.65949,  11708.14715],
       [   466.66   ,    468.67   ,    445.21   ,    452.26   ,
         25387.11695,  12902.22244],
       [   452.26   ,    455.1    ,    400.     ,    411.01   ,
         38638.76036,  16000.21644],
       [   413.5    ,    427.61   ,    375.01   ,    406.33   ,
         41343.73269,  17174.99356],
       [   406.52   ,    456.78   ,    390.     ,    442.56   ,
         41064.60305,  1714

In [15]:
# Set y label
y = ethereum_df[["prediction"]]
y = y[:-projection].values
y

array([[457.63],
       [460.3 ],
       [466.67],
       [452.26],
       [411.01],
       [406.33],
       [442.56],
       [460.  ],
       [427.35],
       [508.7 ],
       [620.  ],
       [683.99],
       [679.76],
       [674.95],
       [682.81],
       [707.64],
       [784.  ],
       [798.  ],
       [790.21],
       [782.41],
       [635.97],
       [640.34],
       [658.  ],
       [714.95],
       [750.  ],
       [739.89],
       [707.89]])

In [16]:
ethereum_df.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 30 entries, 2017-11-29 to 2017-12-28
Data columns (total 7 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   open        30 non-null     float64
 1   high        30 non-null     float64
 2   low         30 non-null     float64
 3   close       30 non-null     float64
 4   volume      30 non-null     float64
 5   trade       30 non-null     float64
 6   prediction  30 non-null     float64
dtypes: float64(7)
memory usage: 1.9 KB


In [17]:
# train = ethereum_df.loc["2017-11": "2018-10"]
# test = ethereum_df.loc["2018-11"]


In [18]:
# X_train, y_train = train.drop("prediction", axis=1), train[["prediction"]]
# X_test, y_test =  test.drop("prediction", axis=1), test[["prediction"]]

In [19]:
#X_train["open_shift_1"] = X_train["open"].shift(1, fill_value=0)
#X_test["open_shift_1"] = X_train["open"].shift(1, fill_value=0)

In [20]:
# Create model
model = LinearRegression()

In [21]:
tscv = TimeSeriesSplit()
#print(tscv)

for train_index, test_index in tscv.split(X):
    print("TRAIN:", train_index, "TEST:", test_index)
    
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    
    model.fit(X_train, y_train)

TRAIN: [0 1 2 3 4 5 6] TEST: [ 7  8  9 10]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10] TEST: [11 12 13 14]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14] TEST: [15 16 17 18]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18] TEST: [19 20 21 22]
TRAIN: [ 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22] TEST: [23 24 25 26]


In [22]:
# # Scale the data
# X_scaler = StandardScaler()
# X_scaler.fit(X_train)

In [23]:
# # Apply the StandardScaler to the train and test daatsets
# X_train_scaled = X_scaler.transform(X_train)
# X_test_scaled = X_scaler.transform(X_test)

In [24]:
# # Train the model
# model.fit(X_train_scaled, y_train)

In [25]:
predictions = model.predict(X_test)
print(predictions)

[[313.65854487]
 [524.75622573]
 [459.57889714]
 [578.64659425]]


In [26]:
from sklearn.metrics import mean_absolute_error

In [27]:
mean_absolute_error(y_test, predictions)

259.0224345007655

In [28]:
y_test

array([[714.95],
       [750.  ],
       [739.89],
       [707.89]])

In [29]:
data = pd.DataFrame()
data["actual"] = y_test
data["predictions"] = predictions
data

ValueError: Cannot set a frame with no defined index and a value that cannot be converted to a Series

In [None]:
data.plot(figsize=(14,10));

In [None]:
data.dtypes

In [None]:
data = pd.DataFrame()
data["scores"] = model.coef_[0]
data["features"] = X_train.columns.tolist()
data

In [None]:
data.plot(kind="bar", x="features", y="scores")

In [None]:
#print confindence score
model_score = model.score(X_test_scaled, y_test)

print(f"Model R2 Score: {model_score}")

In [None]:
# Create a variable called X_project and set it equal to the last 14 rows of the original dataset
#X_projection = np.array(etherum_df[["close"]])