# IMPORT DATA
Import the data and display first 5 instances

In [55]:
import joblib
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.feature_selection import RFE
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [56]:
import yfinance as yf

mold = pd.DataFrame()
finance = ['BTC','ETH','ADA', 'BNB','USDT','DOT','XRP','UNI','LTC', 'LINK',
           'BCH','USDC', 'XLM','THETA','XTZ','DOGE','LUNA', 'FIL','VET','ATOM']
names = ['Bitcoin','Ethereum','Cardano','Binance Coin', 'Tether','Polkadot','Ripple', 'Uniswap','Litecoin','Chainlink',
         'Bitcoin Cash', 'USD Coin', 'Stellar', 'Theta','Tezos','Dogecoin','Terra','Filecoin','VeChain','Cosmos',]

for coin in finance:
    index = finance.index(coin)
    df = yf.Ticker(f'{coin}-USD').history(start='2020-03-15', end='2022-03-15', interval='1d')
    df = pd.DataFrame(df)
    df['names'] = names[index]
    df['Symbol'] = coin
    mold = mold.append(df)

print(mold.reset_index())
mold.to_csv('data_crypto.csv')

# read data
df = pd.read_csv('data_crypto.csv')
print(df.shape)

(14422, 10)


In [57]:
# Date is in object, I will convert it to readable python date
df['Date'] = pd.to_datetime(df.Date)

# Drop columns
df = df.drop(['Dividends','Stock Splits','names', 'Volume'], axis=1)
df.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Symbol
0,2020-03-15,5201.066895,5836.64502,5169.283203,5392.314941,BTC
1,2020-03-16,5385.229492,5385.229492,4575.35791,5014.47998,BTC
2,2020-03-17,5002.578125,5371.348633,4981.90918,5225.629395,BTC
3,2020-03-18,5227.11377,5331.833984,5069.335938,5238.438477,BTC
4,2020-03-19,5245.416504,6329.73584,5236.96875,6191.192871,BTC


ENCODING SYMBOL

In [58]:
# ordinal encoding-----mapping desired order with a dictionary
coins_dict= {'BTC':1,'ETH':2,'ADA':3, 'BNB':4,'USDT':5,'DOT':6,'XRP':7,'UNI':8,'LTC':9, 'LINK':10,
           'BCH':11,'USDC':12,'XLM':13,'THETA':14,'XTZ':15,'DOGE':16,'LUNA':17, 'FIL':18,'VET':19,'ATOM':20}



# assignong the values
df['Symbol'] = df.Symbol.map(coins_dict)
df

Unnamed: 0,Date,Open,High,Low,Close,Symbol
0,2020-03-15,5201.066895,5836.645020,5169.283203,5392.314941,1
1,2020-03-16,5385.229492,5385.229492,4575.357910,5014.479980,1
2,2020-03-17,5002.578125,5371.348633,4981.909180,5225.629395,1
3,2020-03-18,5227.113770,5331.833984,5069.335938,5238.438477,1
4,2020-03-19,5245.416504,6329.735840,5236.968750,6191.192871,1
...,...,...,...,...,...,...
14417,2022-03-11,28.047722,28.389145,27.091694,27.528900,20
14418,2022-03-12,27.527025,28.227278,27.215895,27.219019,20
14419,2022-03-13,27.215071,27.495592,26.077883,26.297186,20
14420,2022-03-14,26.296465,27.120045,25.862419,26.839888,20


In [59]:
# creating target value
df2 = df.copy()
df

Unnamed: 0,Date,Open,High,Low,Close,Symbol
0,2020-03-15,5201.066895,5836.645020,5169.283203,5392.314941,1
1,2020-03-16,5385.229492,5385.229492,4575.357910,5014.479980,1
2,2020-03-17,5002.578125,5371.348633,4981.909180,5225.629395,1
3,2020-03-18,5227.113770,5331.833984,5069.335938,5238.438477,1
4,2020-03-19,5245.416504,6329.735840,5236.968750,6191.192871,1
...,...,...,...,...,...,...
14417,2022-03-11,28.047722,28.389145,27.091694,27.528900,20
14418,2022-03-12,27.527025,28.227278,27.215895,27.219019,20
14419,2022-03-13,27.215071,27.495592,26.077883,26.297186,20
14420,2022-03-14,26.296465,27.120045,25.862419,26.839888,20


In [60]:
df.head(5)

Unnamed: 0,Date,Open,High,Low,Close,Symbol
0,2020-03-15,5201.066895,5836.64502,5169.283203,5392.314941,1
1,2020-03-16,5385.229492,5385.229492,4575.35791,5014.47998,1
2,2020-03-17,5002.578125,5371.348633,4981.90918,5225.629395,1
3,2020-03-18,5227.11377,5331.833984,5069.335938,5238.438477,1
4,2020-03-19,5245.416504,6329.73584,5236.96875,6191.192871,1


###CREATING DEPENDENT AND INDEPENDENT VARIABLES

In [61]:
X = df.drop(['Date','Close'], axis=1)  #remaining OPEN,HIGH,LOW, symbol
X

Unnamed: 0,Open,High,Low,Symbol
0,5201.066895,5836.645020,5169.283203,1
1,5385.229492,5385.229492,4575.357910,1
2,5002.578125,5371.348633,4981.909180,1
3,5227.113770,5331.833984,5069.335938,1
4,5245.416504,6329.735840,5236.968750,1
...,...,...,...,...
14417,28.047722,28.389145,27.091694,20
14418,27.527025,28.227278,27.215895,20
14419,27.215071,27.495592,26.077883,20
14420,26.296465,27.120045,25.862419,20


In [62]:
y = df.Close
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1) #splitting into 80:20

X_train.shape, X_test.shape, y_train.shape, y_test.shape

((11537, 4), (2885, 4), (11537,), (2885,))

In [63]:
# scaler = preprocessing.MinMaxScaler(feature_range=(0,1)) #declaring the scaler
# X_train_scaled = scaler.fit_transform(X_train)  #fitting the scaler to the df and transforming
# X_test_scaled = scaler.transform(X_test)
#
# print('\nScaled data:\n', X_test_scaled[0:10])
# X_train.shape, X_test.shape, y_train.shape, y_test.shape

In [64]:
# scale =preprocessing.MinMaxScaler(feature_range=(0,1))
# X_scaled =scale.fit_transform(X)
# X_comb =pd.DataFrame(X_scaled,df.Symbol, columns=X.columns)
# print(X_comb)


In [65]:
print(X_test)

               Open          High           Low  Symbol
12731     49.387711     51.367092     48.701923      18
14390     26.556368     28.290575     26.187340      20
8754       0.081324      0.084418      0.079397      13
12315     12.138168     14.479794     11.947193      18
2611     634.006897    644.124512    612.162476       4
...             ...           ...           ...     ...
7957       0.998442      1.004348      0.992794      12
10141      2.328073      2.436591      2.300155      15
240    15332.350586  15450.329102  15124.959961       1
197    10776.613281  10945.347656  10703.893555       1
2452      29.164108     30.451855     28.959318       4

[2885 rows x 4 columns]


In [66]:
print(y_train)

882        428.741791
10872        0.002326
11620        0.003491
9330         0.087195
3830        25.230295
             ...     
905        335.260071
5192         0.000172
12172        0.008475
235      15579.848633
13349        0.117249
Name: Close, Length: 11537, dtype: float64


In [67]:
print(X_train)

               Open          High           Low  Symbol
882      390.838104    432.904602    379.710876       2
10872      0.002313      0.002361      0.002283      16
11620      0.003425      0.003529      0.003415      17
9330       0.087488      0.088151      0.084488      14
3830      23.590525     25.577707     23.333549       6
...             ...           ...           ...     ...
905      388.038391    394.172272    316.774353       2
5192       0.000169      0.000174      0.000169       8
12172      0.008450      0.008507      0.008278      17
235    14133.733398  15706.404297  14102.088867       1
13349      0.106157      0.118003      0.105394      19

[11537 rows x 4 columns]


In [68]:
print(y_test)

12731       51.307907
14390       27.043846
8754         0.083808
12315       13.718911
2611       624.563293
             ...     
7957         1.000145
10141        2.372494
240      15290.902344
197      10709.652344
2452        30.117603
Name: Close, Length: 2885, dtype: float64


In [69]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.compose import TransformedTargetRegressor
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler

regressor = KNeighborsRegressor(n_neighbors=1)

model = TransformedTargetRegressor(regressor= regressor,
                                        transformer = MinMaxScaler()
                                        ).fit(X_train,y_train)
y_pred = model.predict(X_test)
print(y_pred)

[4.95172424e+01 2.81411934e+01 8.38740021e-02 ... 1.57013398e+04
 1.07969512e+04 3.00295773e+01]


In [70]:
print(y_test)

12731       51.307907
14390       27.043846
8754         0.083808
12315       13.718911
2611       624.563293
             ...     
7957         1.000145
10141        2.372494
240      15290.902344
197      10709.652344
2452        30.117603
Name: Close, Length: 2885, dtype: float64


In [71]:
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print('R2 score: ', r2)
print('Mean absolute error: {:.2f}'. format(mae))

R2 score:  0.9994583828144564
Mean absolute error: 35.63


In [72]:
from sklearn.metrics import mean_absolute_percentage_error

mape = mean_absolute_percentage_error(y_test, y_pred)
print('\nMean absolute percentage error: \n', mape)



Mean absolute percentage error: 
 0.03707449723420161


In [73]:
# saving the model for use
saved_model='K.sav'
joblib.dump(model,saved_model)


['K.sav']

In [74]:

loaded_model=joblib.load(saved_model)
result=loaded_model.score(X_test, y_test)
print(result)

0.9994583828144564


In [75]:
# joining X_test to predicted values
X_test_df = X_test.copy()
X_test_df['predicted'] = y_pred
X_test_df.head()

Unnamed: 0,Open,High,Low,Symbol,predicted
12731,49.387711,51.367092,48.701923,18,49.517242
14390,26.556368,28.290575,26.18734,20,28.141193
8754,0.081324,0.084418,0.079397,13,0.083874
12315,12.138168,14.479794,11.947193,18,12.08075
2611,634.006897,644.124512,612.162476,4,609.283508


USING TRAINED MODEL FOR A COIN

In [25]:
import joblib
import numpy as np
from sklearn.model_selection import train_test_split
import yfinance as yf
from sklearn.model_selection import train_test_split
import datetime as dt


#Prediction for individual coins
#define function to collect data and returns data for b†c only
def retrieve(abb):
    start = dt.datetime(2021, 3, 15)
    end = dt.datetime.now()
    df = yf.download(f'{abb}-USD', start=start, end=end, interval='1d')
    df.reset_index(inplace=True)
    df_2= df.drop(['Adj Close','Volume'], axis=1)
    return df_2

#function to collect close column and create new target variables
def create(df):
    df = df.drop(["Open","High","Low","Date"], axis=1) # picking only close
    df['predict_1'] = df['Close'].shift(-1)  #Creating new columns for prediction
    df['predict_7d'] = df['Close'].shift(-7)
    df['predict_30'] = df['Close'].shift(-30)
    df['predict_90'] = df['Close'].shift(-90)
    dfw = df.dropna().copy()  #drop not available data after shifting
    return dfw



#Convert df to a numpy array and drop prediction
#Separating independent from dependent variables
#makes prediction with trained model
def create_predict(df_f):
    prediction_days = 90
    x = np.array(df_f['Close']).reshape(-1, 1)  #.values.reshape(-1,1))
    X = x[:len(x)-prediction_days]-1   #Remove last n rows from x data
    Y= df_f.drop(['Close'],  axis=1)  # create dependent data
    y=Y[:len(Y)-prediction_days]-1
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4,random_state=1)  #60-40
    loaded_model=joblib.load('k.sav')
    load = loaded_model.fit(X_train, y_train)
    y_get= load.predict(X_test)
    return y_get

#function to display predicted value specified by user
def display(array_orig, period):
    array_data= list(array_orig)
    # for arr in array_data:
    if period == '1day':
        value = array_data[0][0]
    elif period == '7days':
        value = array_data[0][1]
    elif period == '30days':
        value= array_data[0][2]
    elif period == 'Quarter':
        value = array_data[0][3]
    else:
        value = "Check your input"
    return value


In [26]:
coin = 'BTC'
# duration = 7
crypto_df = retrieve(coin)
print(crypto_df)

[*********************100%***********************]  1 of 1 completed
          Date          Open          High           Low         Close
0   2021-03-15  59267.429688  60540.992188  55393.164062  55907.199219
1   2021-03-16  55840.785156  56833.179688  53555.027344  56804.902344
2   2021-03-17  56825.828125  58969.816406  54528.628906  58870.894531
3   2021-03-18  58893.078125  60116.250000  54253.578125  57858.921875
4   2021-03-19  57850.441406  59498.375000  56643.703125  58346.652344
..         ...           ...           ...           ...           ...
471 2022-06-29  20281.169922  20364.156250  19937.791016  20104.023438
472 2022-06-30  20108.312500  20141.160156  18729.656250  19784.726562
473 2022-07-01  19820.470703  20632.671875  19073.708984  19269.367188
474 2022-07-02  19274.835938  19371.748047  19027.082031  19242.255859
475 2022-07-03  19246.560547  19288.025391  18975.353516  18989.697266

[476 rows x 5 columns]


In [27]:
create = create(crypto_df)
print(create)

            Close     predict_1    predict_7d    predict_30    predict_90
0    55907.199219  56804.902344  54529.144531  63109.695312  39097.859375
1    56804.902344  58870.894531  54738.945312  63314.011719  40218.476562
2    58870.894531  57858.921875  52774.265625  61572.789062  40406.269531
3    57858.921875  58346.652344  51704.160156  60683.820312  38347.062500
4    58346.652344  58313.644531  55137.312500  56216.183594  38053.503906
..            ...           ...           ...           ...           ...
381  45538.675781  46281.644531  43503.847656  37714.875000  20104.023438
382  46281.644531  45868.949219  42287.664062  38469.093750  19784.726562
383  45868.949219  46453.566406  42782.136719  38529.328125  19269.367188
384  46453.566406  46622.675781  42207.671875  37750.453125  19242.255859
385  46622.675781  45555.992188  39521.902344  39698.371094  18989.697266

[386 rows x 5 columns]


In [28]:
predict_df = create_predict(create)
print(predict_df)

[[62209.171875   58481.38671875 58118.578125   42374.6328125 ]
 [64154.94140625 56941.13671875 49361.5078125  44337.796875  ]
 [50503.796875   46611.6328125  43159.9296875  38061.0390625 ]
 [58481.38671875 63225.40234375 57273.6796875  36653.328125  ]
 [62209.171875   58481.38671875 58118.578125   42374.6328125 ]
 [44554.80078125 47792.3203125  51752.41015625 61451.23046875]
 [50581.625      50097.3359375  45896.57421875 39399.5859375 ]
 [61317.95703125 61526.48046875 57805.56640625 37783.33203125]
 [58481.38671875 63225.40234375 57273.6796875  36653.328125  ]
 [58481.38671875 63225.40234375 57273.6796875  36653.328125  ]
 [34661.4375     35039.8359375  33580.55078125 40692.67578125]
 [58322.953125   63108.6953125  57355.40234375 34234.1953125 ]
 [54823.703125   53332.5390625  38435.96875    37336.53515625]
 [31795.81054688 34291.4453125  46003.484375   61592.94921875]
 [33745.00390625 34239.1875     38151.98046875 47710.48828125]
 [38151.98046875 46364.40234375 48846.02734375 61317.95

In [29]:
day='1day'
display1=display(predict_df, day)
print(display1)


62209.171875
