# LIBRARIES

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#Importing all the librairies we'll need
import pandas as pd
import numpy as np
import os
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeRegressor
from sklearn import linear_model
from sklearn import preprocessing
from sklearn.metrics import explained_variance_score
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
from sklearn.linear_model import LinearRegression,Ridge
from sklearn.model_selection import train_test_split, cross_val_score,KFold
from sklearn.preprocessing import LabelEncoder
from xgboost import XGBRegressor
from sklearn.ensemble import StackingRegressor,VotingRegressor,BaggingRegressor,ExtraTreesRegressor,RandomForestRegressor,GradientBoostingRegressor
from sklearn.model_selection import GridSearchCV

In [None]:
import warnings
warnings.simplefilter('ignore')

In [None]:
class Crypto :

  def __init__(self,path='') :
    self.path = path

  def read_csvs(self) :
    train= pd.read_csv(os.path.join(self.path,"Train.csv"))
    test = pd.read_csv(os.path.join(self.path,"Test.csv"))
    ss   = pd.read_csv(os.path.join(self.path,"SampleSubmission.csv"))

    return train , test ,ss
  def setTestZeros(self,test) :
    return test.loc[test['open'].isnull()].id
  
  def SplitStrategy(self,nfold) :
    return KFold(n_splits=nfold, shuffle=True, random_state=1994)
  def process(self , train,test) :
    train=train.loc[train['close'].isnull()==False]
    return train

  def defineXY(self,train,feat) :
    X=train[feat]
    y = train.close.astype(float)
    return X, y

  def define_model(self,name='LinReg') :

    if name == 'LinReg' :
      return BaggingRegressor(linear_model.LinearRegression(),
                              random_state=50,n_estimators=10)
    elif name =='rf' :
      return RandomForestRegressor(random_state=50,
                                   n_estimators=500,max_depth=15)
      
    elif name =='ridge' :
      return BaggingRegressor(Ridge(alpha=0.001),
                              n_estimators=50,random_state=50)
      
    elif name =='BayReg' :
      return BaggingRegressor(linear_model.BayesianRidge(n_iter=1000),
                              random_state=50,n_estimators=10)
      
    elif name =='GradBReg' :
      return GradientBoostingRegressor(random_state=50,learning_rate=0.01,
                                       loss='ls',n_estimators=1700,max_depth=5)
      
    elif name =='ExTreeReg' :
      return ExtraTreesRegressor(random_state=50,
                                 n_estimators=220,max_depth=15)
      
    elif name =='xgb' :
      return XGBRegressor(objective ='reg:tweedie',n_estimators=3000,
                          learning_rate=0.01,max_depth=8,random_state=50,
                          eval_metric='rmse',reg_lambda=0.5,reg_alpha=0.01,subsample=0.9,verbose=0)
    
  def RunModel(self,name,X,y,test,kfolds) :
    y_pred_tot=[]
    rmse_score=[]
    oof = np.zeros(len(X))

    if 'xgb' not in name : # <---
      X=X.fillna(-1) 
    
    for ind,(ind_train,ind_val) in (enumerate (kfolds.split(X,y))): 
        X_train,X_val = X.iloc[ind_train],X.iloc[ind_val] 
        y_train,y_val = y.iloc[ind_train],y.iloc[ind_val]
        
        model = self.define_model(name)
        if 'xgb' not in name :
          model.fit(X_train,y_train)
        else:
          model.fit(X_train,y_train,early_stopping_rounds=300,eval_set=[(X_val,y_val)],verbose=0)
        val_pred = model.predict(X_val)
        oof[ind_val] = val_pred.clip(0)
        score_val=np.sqrt(mean_squared_error(y_val, val_pred))
        print(f'rmse fold {ind}',score_val)
        
        if 'xgb' not in name : # <---
          test_pred = model.predict(test[X.columns].fillna(-1))
        else :
          test_pred = model.predict(test[X.columns]) # <---

        y_pred_tot.append(test_pred)
        rmse_score.append(score_val)
    print('rmse oof score : ',mean_squared_error(y,oof,squared=False))

    return oof , y_pred_tot
  
  def StackingData(self ,y,test_predictions : list , oof_predictions : list) :
    self.y = y    
    stacking_train = pd.DataFrame()
    for i in range(0,len(test_predictions)) :
      stacking_train[f'preds_{i}'] =  oof_predictions[i]

    stacking_train['Target'] = self.y

    stacking_test = pd.DataFrame()
    for i in range(0,len(test_predictions)) :
      stacking_test[f'preds_{i}'] =  test_predictions[i]

    return  stacking_train , stacking_test

  def StackingRegressor(self ,y,KFOLD,test_predictions : list , oof_predictions : list) :
    stacking_train , stacking_test = self.StackingData(y,test_predictions,oof_predictions)
    
    cols = stacking_test.columns
    X , y , Test = stacking_train[cols] , stacking_train['Target'] , stacking_test[cols]
    final_preds = [] ; err_cb = []
    oof_stack = np.zeros(len(X)) ;
    
    for fold,(train_index, test_index) in enumerate(KFOLD.split(X,y)):
        X_train, X_test = X.values[train_index], X.values[test_index]
        y_train, y_test = y.values[train_index], y.values[test_index]

        model = BaggingRegressor(linear_model.Ridge(),random_state=50,n_estimators=14)
        model.fit(X_train,y_train)
        preds=model.predict(X_test)
        preds = preds.clip(0)
        oof_stack[test_index] = preds
        err_cb.append(mean_squared_error(y_test,preds,squared=False))
        
        test_pred = model.predict(Test.values)
        final_preds.append(test_pred)
    
    print(2*'--------------------------------------')
    print('STACKING RMSE',mean_squared_error(y, oof_stack,squared=False))

    return oof_stack,np.mean(final_preds,axis=0)

  def create_submission(self,predictions,dir_path) :
    def zeroconfig(x):
      if x<0:
        x=0
      return x 
    _ , test , _ = self.read_csvs()
    settozero = self.setTestZeros(test)
    sub_file = test[['id','low','open','high']].fillna(0)
    sub_file['close']= predictions

    for i in sub_file.index:
      sub_file['close'][i]=np.clip(sub_file['close'][i],sub_file['low'][i], sub_file['high'][i])
    sub_file['close']=sub_file['close'].apply(lambda x:zeroconfig(x))

    for i in settozero:
      sub_file.loc[sub_file['id']==i,'close']=0
    sub_file['close'] = sub_file['close']*1.05 - sub_file['open']*0.05

    return sub_file[['id','close']].to_csv(f'{dir_path}.csv', index = False)

In [None]:
TikTak7otFSac = Crypto(path='/content/drive/MyDrive/ZINDI/Cryptocurrency Closing Price Prediction/')

In [None]:
train , test ,ss = TikTak7otFSac.read_csvs()

In [None]:
kfolds = TikTak7otFSac.SplitStrategy()
train = TikTak7otFSac.process(train,test)
feat=['high','low','open','market_cap','price_score','volatility','galaxy_score','percent_change_24h_rank','percent_change_24h','market_cap_global','market_cap_rank']


In [None]:
feat=['high','low','open','market_cap','market_cap_global']
X,y = TikTak7otFSac.defineXY(train,feat)
oof_LinReg , preds_LinReg = TikTak7otFSac.RunModel(name='LinReg',
                                             X=X,y=y,test=test,
                                             kfolds=kfolds)

rmse fold 0 57.65288515758524
rmse fold 1 60.42598482796608
rmse fold 2 62.736818986080216
rmse fold 3 66.49773457642691
rmse fold 4 56.1880804353115
rmse fold 5 59.485282653363534
rmse fold 6 68.9718303072926
rmse fold 7 69.75629753471398
rmse fold 8 65.66077768961091
rmse fold 9 116.5704125857321
rmse oof score :  70.38643051673033


In [None]:
feat=['high','low','open','market_cap','market_cap_global']
X,y = TikTak7otFSac.defineXY(train,feat)
oof_rf , preds_rf = TikTak7otFSac.RunModel(name='rf',
                                           X=X,y=y,test=test,       #70,147
                                           kfolds=kfolds)

rmse fold 0 63.310243585968315
rmse fold 1 55.776812819513374
rmse fold 2 52.36110199025464
rmse fold 3 98.71998564787212
rmse fold 4 62.54706542738137
rmse fold 5 77.26601642427885
rmse fold 6 49.16904237735241
rmse fold 7 74.5749483413916
rmse fold 8 80.94635161275848
rmse fold 9 66.98141970119596
rmse oof score :  69.64495394882279


In [None]:
feat=['high','low','open','market_cap','galaxy_score','percent_change_24h_rank','percent_change_24h','market_cap_global','market_cap_rank']
X,y = TikTak7otFSac.defineXY(train,feat)
oof_ridge , preds_ridge = TikTak7otFSac.RunModel(name='ridge',
                                             X=X,y=y,test=test,
                                             kfolds=kfolds)

rmse fold 0 57.845166554409374
rmse fold 1 60.58491004039953
rmse fold 2 62.483987383385696
rmse fold 3 67.74721506599228
rmse fold 4 55.52772000301272
rmse fold 5 59.49858062967734
rmse fold 6 68.7374727606933
rmse fold 7 70.24919765314792
rmse fold 8 64.66050800670965
rmse fold 9 117.05442245665483
rmse oof score :  70.47500354984693


In [None]:
oof_BayReg , preds_BayReg = TikTak7otFSac.RunModel(name='BayReg',
                                             X=X,y=y,test=test,
                                             kfolds=kfolds)

rmse fold 0 57.62425530007833
rmse fold 1 60.53647172763482
rmse fold 2 62.714606931580306
rmse fold 3 66.46323226508808
rmse fold 4 56.10341033709003
rmse fold 5 59.51178900472175
rmse fold 6 69.1568741220433
rmse fold 7 69.76693380062768
rmse fold 8 65.67620053845654
rmse fold 9 116.53822890084535
rmse oof score :  70.39915545678066


In [None]:
feat=['high','low','open','market_cap','galaxy_score','percent_change_24h_rank','percent_change_24h','market_cap_global','market_cap_rank']
X,y = TikTak7otFSac.defineXY(train,feat)
kfolds = TikTak7otFSac.SplitStrategy(5)
oof_GradBReg , preds_GradBReg = TikTak7otFSac.RunModel(name='GradBReg',
                                             X=X,y=y,test=test,                 #69.92
                                             kfolds=kfolds)

rmse fold 0 61.64530167394278
rmse fold 1 81.06747102021625
rmse fold 2 67.63573136796772
rmse fold 3 64.31692842502778
rmse fold 4 73.22477532316724
rmse oof score :  69.92217087892686


In [None]:
train

Unnamed: 0,id,asset_id,open,high,low,volume,market_cap,url_shares,unique_url_shares,reddit_posts,reddit_posts_score,reddit_comments,reddit_comments_score,tweets,tweet_spam,tweet_followers,tweet_quotes,tweet_retweets,tweet_replies,tweet_favorites,tweet_sentiment1,tweet_sentiment2,tweet_sentiment3,tweet_sentiment4,tweet_sentiment5,tweet_sentiment_impact1,tweet_sentiment_impact2,tweet_sentiment_impact3,tweet_sentiment_impact4,tweet_sentiment_impact5,social_score,average_sentiment,news,price_score,social_impact_score,correlation_rank,galaxy_score,volatility,market_cap_rank,percent_change_24h_rank,volume_24h_rank,social_volume_24h_rank,social_score_24h_rank,medium,youtube,social_volume,percent_change_24h,market_cap_global,close
0,ID_322qz6,1,9422.849081,9428.490628,9422.849081,7.131986e+08,1.737635e+11,1689.0,817.0,55.0,105.0,61.0,271.0,3420.0,1671.0,11675867.0,39.0,1343.0,448.0,2237.0,124.0,330.0,331.0,2515.0,120.0,506133.0,1326610.0,1159677.0,8406185.0,281329.0,11681999.0,3.6,69.0,2.7,3.6,3.3,66.0,0.007118,1.0,606.0,2.0,1.0,1.0,2.0,5.0,4422,1.434516,2.818066e+11,9428.279323
1,ID_3239o9,1,7985.359278,7992.059917,7967.567267,4.004755e+08,1.426942e+11,920.0,544.0,20.0,531.0,103.0,533.0,1491.0,242.0,5917814.0,195.0,1070.0,671.0,3888.0,1.0,52.0,315.0,1100.0,23.0,1320.0,381117.0,1706376.0,3754815.0,80010.0,5924770.0,3.7,1.0,2.0,2.0,1.0,43.5,0.009419,1.0,,,,,,,2159,-2.459507,2.126897e+11,7967.567267
2,ID_323J9k,1,49202.033778,49394.593518,49068.057046,3.017729e+09,9.166977e+11,1446.0,975.0,72.0,1152.0,187.0,905.0,9346.0,4013.0,47778746.0,104.0,2014.0,1099.0,11476.0,331.0,923.0,864.0,6786.0,442.0,9848462.0,5178557.0,2145663.0,25510267.0,5110490.0,47796942.0,3.7,22.0,3.1,3.0,3.3,65.5,0.013530,1.0,692.0,3.0,1.0,1.0,,,10602,4.942448,1.530712e+12,49120.738484
4,ID_324kJH,1,10535.737119,10535.737119,10384.798216,1.150053e+09,1.921183e+11,1012.0,638.0,24.0,42.0,50.0,173.0,3262.0,1652.0,14422172.0,21.0,511.0,190.0,2284.0,86.0,280.0,443.0,2284.0,169.0,311017.0,1977833.0,731277.0,10964321.0,440730.0,14426405.0,3.7,22.0,4.7,3.8,4.4,83.0,0.010332,1.0,749.0,2.0,1.0,1.0,,2.0,3996,2.609576,3.386925e+11,10384.798216
6,ID_325m2L,1,9697.250948,9712.247413,9697.250948,1.231231e+09,1.767035e+11,1079.0,661.0,66.0,138.0,53.0,345.0,3890.0,401.0,30469074.0,98.0,1398.0,678.0,4066.0,102.0,181.0,861.0,2490.0,256.0,281173.0,1208072.0,2631596.0,25743321.0,611152.0,30476752.0,3.7,43.0,3.5,3.0,3.1,66.5,0.008594,1.0,656.0,2.0,1.0,1.0,,,4713,1.159070,2.807819e+11,9712.247413
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12626,ID_zwg5VZ,1,19215.293853,19219.559735,19214.640359,8.844918e+08,3.563280e+11,1302.0,841.0,61.0,513.0,101.0,421.0,4792.0,1868.0,23961117.0,100.0,935.0,365.0,4690.0,115.0,439.0,579.0,3462.0,197.0,1920165.0,3655243.0,3190489.0,14779550.0,421760.0,23969443.0,3.7,36.0,2.6,4.3,2.5,65.5,0.006747,1.0,787.0,2.0,1.0,1.0,1.0,2.0,5831,0.266681,5.690401e+11,19219.559735
12628,ID_zydeys,1,7294.506136,7306.701928,7292.729876,3.816704e+08,1.322473e+11,605.0,462.0,16.0,217.0,39.0,156.0,1501.0,241.0,10909376.0,9.0,170.0,44.0,292.0,35.0,51.0,358.0,973.0,84.0,163376.0,304833.0,775658.0,9213680.0,452344.0,10910586.0,3.7,2.0,3.3,3.0,2.6,63.0,0.019912,1.0,1007.0,2.0,1.0,1.0,,,2020,,1.978198e+11,7303.640282
12629,ID_zyfj4W,1,8811.121632,8832.560173,8754.602574,2.412260e+09,1.621298e+11,1653.0,757.0,74.0,249.0,60.0,107.0,4852.0,513.0,14878543.0,92.0,1127.0,454.0,3252.0,144.0,203.0,952.0,3101.0,452.0,617875.0,656816.0,2617007.0,10307241.0,684529.0,14885477.0,3.7,44.0,2.8,3.0,3.9,67.0,0.020715,1.0,918.0,2.0,1.0,1.0,1.0,2.0,5787,,2.493421e+11,8828.440234
12630,ID_zz5eGB,1,9661.514681,9662.196605,9645.133934,9.861517e+08,1.774933e+11,1376.0,657.0,39.0,81.0,43.0,122.0,3915.0,675.0,14439095.0,153.0,871.0,274.0,3343.0,74.0,188.0,671.0,2691.0,291.0,278732.0,360131.0,1733775.0,10881941.0,1189157.0,14445315.0,3.8,25.0,3.0,4.4,4.5,78.5,0.012411,1.0,603.0,2.0,1.0,1.0,,9.0,4679,2.706432,2.757547e+11,9658.204385


In [None]:
feat=['high','low','open','market_cap','price_score','volatility','galaxy_score','percent_change_24h_rank','percent_change_24h','market_cap_global','market_cap_rank']
X,y = TikTak7otFSac.defineXY(train,feat)
kfolds = TikTak7otFSac.SplitStrategy(10)
oof_ExTreeReg , preds_ExTreeReg = TikTak7otFSac.RunModel(name='ExTreeReg',
                                             X=X,y=y,test=test,
                                             kfolds=kfolds)

rmse fold 0 60.835176632830375
rmse fold 1 55.640548838914114
rmse fold 2 49.32744999412876
rmse fold 3 84.92265938325471
rmse fold 4 56.19561064243709
rmse fold 5 65.93140917926769
rmse fold 6 51.017514669787666
rmse fold 7 65.5610915766283
rmse fold 8 64.68807478948247
rmse fold 9 68.59516240727186
rmse oof score :  63.0366233943428


In [None]:
feat=['high','low','open','market_cap','volatility','galaxy_score','percent_change_24h_rank','percent_change_24h','market_cap_global','market_cap_rank']
X,y = TikTak7otFSac.defineXY(train,feat)
oof_xgb , preds_xgb = TikTak7otFSac.RunModel(name='xgb',
                                             X=X,y=y,test=test,
                                             kfolds=kfolds)

rmse fold 0 58.78336627329315
rmse fold 1 64.71219474389277
rmse fold 2 67.10911399548861
rmse fold 3 61.107928422330744
rmse fold 4 61.61624415697922
rmse oof score :  62.73339417895361


* **Stacking**

In [None]:
oofs_pred = [oof_ExTreeReg,oof_xgb,oof_ridge]
test_pred = [
             np.mean(preds_ExTreeReg,axis=0),np.mean(preds_xgb,axis=0),
             np.mean(preds_ridge,axis=0)]
oof_stack,stack_preds  = TikTak7otFSac.StackingRegressor(y=y.values,KFOLD=kfolds ,
                                                         test_predictions=test_pred ,oof_predictions=oofs_pred)

----------------------------------------------------------------------------
STACKING RMSE 58.209655855328485


In [None]:
oofs_pred = [oof_GradBReg,oof_ExTreeReg,oof_BayReg,oof_xgb,oof_ridge]
test_pred = [np.mean(preds_GradBReg,axis=0),
             np.mean(preds_ExTreeReg,axis=0),np.mean(preds_BayReg,axis=0),np.mean(preds_xgb,axis=0),
             np.mean(preds_ridge,axis=0)]
oof_stack,stack_preds  = TikTak7otFSac.StackingRegressor(y=y.values,KFOLD=kfolds ,
                                                         test_predictions=test_pred ,oof_predictions=oofs_pred)

----------------------------------------------------------------------------
STACKING RMSE 58.126830402471704


In [None]:
TikTak7otFSac.create_submission(stack_preds,dir_path='tiktak1')