In [1]:
# ✅ Importing Libraries :
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor

from sklearn.feature_selection import SelectKBest, chi2, RFE

import pickle
import time 

In [2]:
# ✅ 1.Loading Original Dataset : 
dataset=pd.read_csv("Preprocessed_Data_Cryptos - One Percent of Actual Dataset.csv",index_col=None)
print(dataset.shape)
dataset.head()

(5242, 18)


Unnamed: 0,Crypto,Date,Time - 24 Hour Format,Time - 12 Hour Format,volume,open,Close Minus Open,Adj Close Minus Open,Close Minus Adj Close,High Minus Open,High Minus Close,High Minus Adj Close,High Minus Low,close,adj_close,high,low,Trade Impact
0,SPC-USD,03-10-2020,00:00:00,12:00 AM,1954.0,0.005614,0.000301,0.000301,0.0,0.000305,4e-06,4e-06,0.00039,0.005915,0.005915,0.005919,0.005529,Positive Impact
1,CONX28135-USD,21-03-2025,00:00:00,12:00 AM,46719.0,17.543959,0.109655,0.109655,0.0,0.180546,0.07089,0.07089,0.242294,17.653614,17.653614,17.724504,17.48221,Positive Impact
2,SHX-USD,24-11-2020,00:00:00,12:00 AM,8190.0,0.000192,-3e-06,-3e-06,0.0,1.9e-05,2.2e-05,2.2e-05,3.2e-05,0.000189,0.000189,0.000211,0.000179,Negative Impact
3,PALLA-USD,16-12-2022,00:00:00,12:00 AM,84921.0,0.011293,0.000133,0.000133,0.0,0.000147,1.4e-05,1.4e-05,0.000162,0.011426,0.011426,0.01144,0.011278,Positive Impact
4,PORK29220-USD,16-05-2024,00:00:00,12:00 AM,1864946.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,No Impact


In [3]:
#✅ 2.Duplicating the Original Dataset
dataset2 = dataset.drop(['Crypto','Date','Time - 24 Hour Format','Time - 12 Hour Format'],axis=1)

#✅ 3.Classifying the Nominal Columns in Dataset : 
dataset2 = pd.get_dummies(dataset2, drop_first=True)
print(dataset2.shape)
dataset2

(5242, 15)


Unnamed: 0,volume,open,Close Minus Open,Adj Close Minus Open,Close Minus Adj Close,High Minus Open,High Minus Close,High Minus Adj Close,High Minus Low,close,adj_close,high,low,Trade Impact_No Impact,Trade Impact_Positive Impact
0,1954.0,0.005614,0.000301,0.000301,0.0,0.000305,0.000004,0.000004,0.000390,0.005915,0.005915,0.005919,0.005529,False,True
1,46719.0,17.543959,0.109655,0.109655,0.0,0.180546,0.070890,0.070890,0.242294,17.653614,17.653614,17.724504,17.482210,False,True
2,8190.0,0.000192,-0.000003,-0.000003,0.0,0.000019,0.000022,0.000022,0.000032,0.000189,0.000189,0.000211,0.000179,False,False
3,84921.0,0.011293,0.000133,0.000133,0.0,0.000147,0.000014,0.000014,0.000162,0.011426,0.011426,0.011440,0.011278,False,True
4,1864946.0,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5237,211093892.0,375.328049,0.000000,0.000000,0.0,0.000000,0.000000,0.000000,0.000000,375.659451,375.627867,400.833600,365.791182,True,False
5238,3487036.0,0.073854,-0.009214,-0.009214,0.0,0.004684,0.013898,0.013898,0.017769,0.064640,0.064640,0.078538,0.060769,False,False
5239,6895906.0,0.000381,0.000000,0.000000,0.0,0.000018,0.000018,0.000018,0.000022,0.000381,0.000381,0.000399,0.000377,True,False
5240,15719.0,0.006267,0.000142,0.000142,0.0,0.000198,0.000056,0.000056,0.000198,0.006409,0.006409,0.006465,0.006267,False,True


In [4]:
dataset2.columns

Index(['volume', 'open', 'Close Minus Open', 'Adj Close Minus Open',
       'Close Minus Adj Close', 'High Minus Open', 'High Minus Close',
       'High Minus Adj Close', 'High Minus Low', 'close', 'adj_close', 'high',
       'low', 'Trade Impact_No Impact', 'Trade Impact_Positive Impact'],
      dtype='object')

In [5]:
#✅ 4.Assigning Variables (Independent/Dependent) : 

indep_X = dataset2.drop(['Trade Impact_Positive Impact'], axis=1)
print(indep_X.shape)

dep_Y = dataset2[['Trade Impact_Positive Impact']]
print(dep_Y.shape)

(5242, 14)
(5242, 1)


In [6]:
#✅ 5.Creating Function(s) :

def train_test_split_and_StandardScaler(indep_X,dep_Y):
    
    X_train, X_test, Y_train, Y_test = train_test_split(indep_X, dep_Y, test_size = 0.25, random_state = 0)
    
    sc = StandardScaler()
    X_train = sc.fit_transform(X_train)
    X_test = sc.transform(X_test)    
    return X_train, X_test, Y_train, Y_test

def RFE_Features_Regression(indep_X, dep_Y, n):
   
    RFE_List = []

    # Flatten y to a 1D array to avoid DataConversionWarning
    dep_Y = dep_Y.values.ravel()

    linear_Regression = LinearRegression()
    svr_Linear_Regression = SVR(kernel='linear', max_iter=1000)
    decisionTree_Regression = DecisionTreeRegressor(random_state = 0)
    randomForest_Regression = RandomForestRegressor(n_estimators = 10, random_state = 0)
        
    RFE_Model_List = [linear_Regression, svr_Linear_Regression, decisionTree_Regression, randomForest_Regression]

    for model in RFE_Model_List:
        print(f"\nRunning RFE for: {model}")
        start = time.time()
        
        regressor_RFE = RFE(estimator = model, n_features_to_select=n)
        regressor_RFE_Fit = regressor_RFE.fit(indep_X, dep_Y)
        regressor_RFE_Feature = regressor_RFE.transform(indep_X)
        RFE_List.append(regressor_RFE_Feature)
        print(f"Finished in {time.time() - start:.2f} seconds")
    return RFE_List

def R2_Prediction(regressor,X_test,Y_test):
    y_pred = regressor.predict(X_test)
    from sklearn.metrics import r2_score
    R2_Score = r2_score(Y_test,y_pred)
    return R2_Score

def Linear_Regression(X_train,Y_train,X_test):       
    # Fitting K-NN to the Training set
    from sklearn.linear_model import LinearRegression
    regressor = LinearRegression()
    regressor.fit(X_train, Y_train)
        
    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_LinearRegression = R2_Prediction(regressor,X_test,Y_test)
    return R2_LinearRegression  

def SVM_Linear(X_train,Y_train,X_test):
    
    from sklearn.svm import SVR
    regressor = SVR(kernel = 'linear')
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_SVM_Linear = R2_Prediction(regressor,X_test,Y_test)
    return R2_SVM_Linear

def SVM_Non_Linear(X_train,Y_train,X_test):

    from sklearn.svm import SVR
    regressor = SVR(kernel = 'rbf')
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_SVM_Non_Linear = R2_Prediction(regressor,X_test,Y_test)
    return R2_SVM_Non_Linear

def DecisionTree(X_train,Y_train,X_test):

    # Fitting K-NN to the Training set
    from sklearn.tree import DecisionTreeRegressor
    regressor = DecisionTreeRegressor(random_state = 0)
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_DecisionTree = R2_Prediction(regressor,X_test,Y_test)
    return R2_DecisionTree

def RandomForest(X_train,Y_train,X_test):

    from sklearn.ensemble import RandomForestRegressor
    regressor = RandomForestRegressor(n_estimators = 10, random_state = 0)
    regressor.fit(X_train, Y_train)

    # Calling a Created Function - R2_Prediction(regressor,X_test,Y_test) which returns - R2_Score
    R2_RandomForest = R2_Prediction(regressor,X_test,Y_test)
    return R2_RandomForest

def RFE_Regression(R2_LinearRegression, R2_SVM_Linear, R2_DecisionTree, R2_RandomForest): 

    dataframe=pd.DataFrame(index=['Logistic Regression','SVM Linear','Decision Tree','Random Forest'],
                           columns=['Logistic Regression','SVM Linear','Decision Tree','Random Forest'])
    
    #Function - enumerate() acts as a Counter which Iterates index starting from 0 (by default) and their item(s) from the iterable
    #Use enumerate() when We need both Position in the loop (number) and its value from the iterable (idex)
    
    for indexCount,indexValue in enumerate(dataframe.index):      
        dataframe.loc[indexValue, 'Logistic Regression'] = R2_LinearRegression[indexCount]       
        dataframe.loc[indexValue, 'SVM Linear'] = R2_SVM_Linear[indexCount]
        dataframe.loc[indexValue, 'Decision Tree'] = R2_DecisionTree[indexCount]
        dataframe.loc[indexValue, 'Random Forest'] = R2_RandomForest[indexCount]
    return dataframe

In [7]:
#✅ 6.Calling a Created Function - RFE_Features_Classification(With Below Parameters): which returns - RFE_List
RFE_List = RFE_Features_Regression(indep_X, dep_Y, 7)


Running RFE for: LinearRegression()
Finished in 0.03 seconds

Running RFE for: SVR(kernel='linear', max_iter=1000)




Finished in 0.99 seconds

Running RFE for: DecisionTreeRegressor(random_state=0)
Finished in 0.11 seconds

Running RFE for: RandomForestRegressor(n_estimators=10, random_state=0)
Finished in 0.84 seconds


In [8]:
#✅ 7.Creating Empty Lists
R2_LinearRegression = []
R2_SVM_Linear = []
R2_DecisionTree = []
R2_RandomForest = []

In [9]:
#✅ 8.Appending the Accuracy Score of All the Models in Created Empty List Through a For Loop 
for X in RFE_List:
    X_train, X_test, Y_train, Y_test = train_test_split_and_StandardScaler(X, dep_Y)
    
    R2_Score = Linear_Regression(X_train,Y_train,X_test)
    R2_LinearRegression.append(R2_Score)
    
    R2_Score = SVM_Linear(X_train,Y_train,X_test)  
    R2_SVM_Linear.append(R2_Score)
    
    R2_Score = DecisionTree(X_train,Y_train,X_test)  
    R2_DecisionTree.append(R2_Score)
    
    R2_Score = RandomForest(X_train,Y_train,X_test)  
    R2_RandomForest.append(R2_Score)

  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)
  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)
  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)
  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


In [10]:
#✅ 9.Calling a Created Function - RFE_Regression(With Below Parameters): which returns - dataframe    
result = RFE_Regression(R2_LinearRegression, R2_SVM_Linear, R2_DecisionTree, R2_RandomForest)

In [11]:
#✅ 10.Calling the Final Results by Selecting Top 6 Features
result

Unnamed: 0,Logistic Regression,SVM Linear,Decision Tree,Random Forest
Logistic Regression,0.010546,-0.324979,1.0,1.0
SVM Linear,0.012239,-0.327366,0.082516,0.254189
Decision Tree,0.034845,-0.323938,1.0,1.0
Random Forest,0.034845,-0.323938,1.0,1.0
