In [1]:
# Pandas
from config import DB_USER, DB_PASS, DB_ENDPOINT
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

# SQL Alchemy
from sqlalchemy import create_engine

In [2]:
 # Create Engine
engine = create_engine(f"postgresql://{DB_USER}:{DB_PASS}@{DB_ENDPOINT}")
conn = engine.connect()

In [3]:
# Query All Records in the the Database
data = pd.read_sql("SELECT * FROM historical", conn)
data.head()

Unnamed: 0,index,crypto,time,open,high,low,close,volume,trade
0,0,bitcoin_gbp,1606349000.0,14063.02,14206.76,12215.41,12934.08,996.456797,569.260191
1,1,bitcoin_gbp,1606435000.0,12939.95,13137.95,12400.0,12947.05,479.649903,338.945379
2,2,bitcoin_gbp,1606522000.0,12959.51,13544.95,12764.78,13384.58,304.392382,153.560657
3,3,bitcoin_gbp,1606608000.0,13388.74,13850.0,13260.7,13734.95,303.258699,156.41113
4,4,bitcoin_gbp,1606694000.0,13742.11,14867.75,13742.11,14827.31,584.021008,362.605071


In [4]:
data = data.drop(labels=["index"], axis=1)
data

Unnamed: 0,crypto,time,open,high,low,close,volume,trade
0,bitcoin_gbp,1.606349e+09,14063.02000000,14206.76000000,12215.41000000,12934.08000000,996.45679700,569.26019100
1,bitcoin_gbp,1.606435e+09,12939.95000000,13137.95000000,12400.00000000,12947.05000000,479.64990300,338.94537900
2,bitcoin_gbp,1.606522e+09,12959.51000000,13544.95000000,12764.78000000,13384.58000000,304.39238200,153.56065700
3,bitcoin_gbp,1.606608e+09,13388.74000000,13850.00000000,13260.70000000,13734.95000000,303.25869900,156.41113000
4,bitcoin_gbp,1.606694e+09,13742.11000000,14867.75000000,13742.11000000,14827.31000000,584.02100800,362.60507100
...,...,...,...,...,...,...,...,...
1536,solana_gbp,1.637539e+09,173.32000000,173.32000000,158.63000000,161.87000000,7266.15000000,3139.43000000
1537,solana_gbp,1.637626e+09,161.35000000,169.36000000,157.81000000,166.45000000,4114.15000000,2220.17000000
1538,solana_gbp,1.637712e+09,166.47000000,166.47000000,150.46000000,154.54000000,4064.37000000,1607.73000000
1539,solana_gbp,1.637798e+09,154.65000000,163.15000000,151.92000000,157.81000000,6067.58000000,2233.40000000


In [5]:
ethereum_df = data[data["crypto"] == "ethereum_gbp"]
ethereum_df = ethereum_df.reset_index(drop=True)
ethereum_df

Unnamed: 0,crypto,time,open,high,low,close,volume,trade
0,ethereum_gbp,1.606349e+09,425.82000000,433.43000000,362.73000000,390.95000000,11532.47951000,6342.33108000
1,ethereum_gbp,1.606435e+09,393.14000000,399.39000000,372.22000000,390.35000000,2817.00171000,1453.05769000
2,ethereum_gbp,1.606522e+09,392.47000000,415.00000000,383.90000000,405.70000000,3089.79232000,1786.37685000
3,ethereum_gbp,1.606608e+09,406.21000000,435.60000000,401.68000000,435.05000000,2099.37215000,1458.57027000
4,ethereum_gbp,1.606694e+09,435.49000000,464.19000000,431.76000000,464.19000000,6548.34914000,4419.21473000
...,...,...,...,...,...,...,...,...
361,ethereum_gbp,1.637539e+09,3187.59000000,3221.81000000,3014.28000000,3058.80000000,3535.81680000,1818.20850000
362,ethereum_gbp,1.637626e+09,3058.53000000,3283.68000000,3039.13000000,3250.27000000,2758.96760000,1569.37840000
363,ethereum_gbp,1.637712e+09,3249.52000000,3277.63000000,3122.16000000,3204.81000000,2221.09350000,1127.63230000
364,ethereum_gbp,1.637798e+09,3206.45000000,3419.35000000,3189.34000000,3399.00000000,2133.21870000,1032.59030000


In [6]:
# drop the crypto column which is just a name
ethereum_df = ethereum_df.drop("crypto", axis=1)
ethereum_df

Unnamed: 0,time,open,high,low,close,volume,trade
0,1.606349e+09,425.82000000,433.43000000,362.73000000,390.95000000,11532.47951000,6342.33108000
1,1.606435e+09,393.14000000,399.39000000,372.22000000,390.35000000,2817.00171000,1453.05769000
2,1.606522e+09,392.47000000,415.00000000,383.90000000,405.70000000,3089.79232000,1786.37685000
3,1.606608e+09,406.21000000,435.60000000,401.68000000,435.05000000,2099.37215000,1458.57027000
4,1.606694e+09,435.49000000,464.19000000,431.76000000,464.19000000,6548.34914000,4419.21473000
...,...,...,...,...,...,...,...
361,1.637539e+09,3187.59000000,3221.81000000,3014.28000000,3058.80000000,3535.81680000,1818.20850000
362,1.637626e+09,3058.53000000,3283.68000000,3039.13000000,3250.27000000,2758.96760000,1569.37840000
363,1.637712e+09,3249.52000000,3277.63000000,3122.16000000,3204.81000000,2221.09350000,1127.63230000
364,1.637798e+09,3206.45000000,3419.35000000,3189.34000000,3399.00000000,2133.21870000,1032.59030000


In [7]:
# Create a variable for predicting 'n' days in the future
projection = 14
# Create another column called prediction
ethereum_df["prediction"] = ethereum_df[["close"]].shift(-projection)
# Show the data
ethereum_df

Unnamed: 0,time,open,high,low,close,volume,trade,prediction
0,1.606349e+09,425.82000000,433.43000000,362.73000000,390.95000000,11532.47951000,6342.33108000,421.36000000
1,1.606435e+09,393.14000000,399.39000000,372.22000000,390.35000000,2817.00171000,1453.05769000,413.51000000
2,1.606522e+09,392.47000000,415.00000000,383.90000000,405.70000000,3089.79232000,1786.37685000,431.93000000
3,1.606608e+09,406.21000000,435.60000000,401.68000000,435.05000000,2099.37215000,1458.57027000,446.10000000
4,1.606694e+09,435.49000000,464.19000000,431.76000000,464.19000000,6548.34914000,4419.21473000,441.23000000
...,...,...,...,...,...,...,...,...
361,1.637539e+09,3187.59000000,3221.81000000,3014.28000000,3058.80000000,3535.81680000,1818.20850000,
362,1.637626e+09,3058.53000000,3283.68000000,3039.13000000,3250.27000000,2758.96760000,1569.37840000,
363,1.637712e+09,3249.52000000,3277.63000000,3122.16000000,3204.81000000,2221.09350000,1127.63230000,
364,1.637798e+09,3206.45000000,3419.35000000,3189.34000000,3399.00000000,2133.21870000,1032.59030000,


In [8]:

ethereum_df.dtypes


time          float64
open           object
high           object
low            object
close          object
volume         object
trade          object
prediction     object
dtype: object

In [9]:
# convert to numerical
ethereum_df = ethereum_df.astype(float)
ethereum_df.dtypes

time          float64
open          float64
high          float64
low           float64
close         float64
volume        float64
trade         float64
prediction    float64
dtype: object

In [10]:
# Set X (features)
X = ethereum_df[:-projection]
X = X.drop("prediction", axis=1)
X

Unnamed: 0,time,open,high,low,close,volume,trade
0,1.606349e+09,425.82,433.43,362.73,390.95,11532.47951,6342.33108
1,1.606435e+09,393.14,399.39,372.22,390.35,2817.00171,1453.05769
2,1.606522e+09,392.47,415.00,383.90,405.70,3089.79232,1786.37685
3,1.606608e+09,406.21,435.60,401.68,435.05,2099.37215,1458.57027
4,1.606694e+09,435.49,464.19,431.76,464.19,6548.34914,4419.21473
...,...,...,...,...,...,...,...
347,1.636330e+09,3369.22,3494.99,3369.04,3485.44,3578.05037,1609.96787
348,1.636416e+09,3485.02,3508.00,3419.13,3442.60,3661.63547,1977.86547
349,1.636502e+09,3442.07,3538.52,3305.01,3434.29,4530.77490,2287.41920
350,1.636589e+09,3430.00,3564.25,3375.95,3535.07,2416.01612,1112.48962


In [11]:
# Set y label

y = ethereum_df["prediction"].values
y = y[:-projection]
y

array([ 421.36,  413.51,  431.93,  446.1 ,  441.23,  439.88,  473.75,
        475.32,  486.67,  490.81,  479.11,  453.99,  477.34,  436.07,
        453.18,  464.95,  473.25,  505.61,  543.5 ,  543.51,  553.49,
        539.64,  535.22,  569.93,  719.64,  769.46,  811.1 ,  891.92,
        906.73,  901.36,  945.34,  933.38,  809.91,  770.2 ,  829.27,
        901.85,  863.56,  906.2 ,  909.66,  927.  , 1007.  , 1008.76,
        811.98,  903.79,  904.86, 1020.  ,  967.42,  995.1 ,  908.83,
        973.  , 1010.25, 1009.93,  962.73, 1007.74, 1109.57, 1223.59,
       1171.17, 1255.73, 1226.65, 1178.31, 1277.96, 1286.47, 1266.08,
       1300.59, 1338.19, 1321.5 , 1308.95, 1283.71, 1289.75, 1339.19,
       1391.48, 1401.42, 1375.  , 1388.14, 1271.24, 1122.75, 1153.24,
       1062.34, 1041.88, 1055.05, 1020.53, 1131.43, 1070.6 , 1130.36,
       1111.66, 1107.3 , 1194.23, 1249.11, 1330.69, 1350.49, 1292.83,
       1309.77, 1271.99, 1381.99, 1333.39, 1294.16, 1300.55, 1306.16,
       1279.68, 1309

In [12]:
# Split into train and test
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

In [13]:
# Scale the data
X_scaler = StandardScaler()
X_scaler.fit(X_train)

StandardScaler()

In [14]:
# Apply the StandardScaler to the train and test daatsets
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [15]:
# Create model
linReg = LinearRegression()

In [17]:
# Train the model
linReg.fit(X_train_scaled, y_train)

LinearRegression()

In [18]:
#print confindence score
model_score = linReg.score(X_test_scaled, y_test)

print(f"Model R2 Score: {model_score}")

Model R2 Score: 0.8323122122423077


In [None]:
# Create a variable called X_project and set it equal to the last 14 rows of the original dataset
X_projection = np.array(etherum_df[["close"]])