In [1]:
# Importing the Required Libraries
import pandas as pd

# Pre-processing library
from sklearn.preprocessing import StandardScaler

# Train_Test_Split
from sklearn.model_selection import train_test_split

# Building the Linear Regression
from sklearn.linear_model import LinearRegression

# Model Evaluation
from sklearn.metrics import mean_squared_error

In [2]:
# Loading the Data

df = pd.read_csv('bitcoin_dataset.csv', index_col = 0)
df.head()

Unnamed: 0_level_0,btc_market_price,btc_total_bitcoins,btc_market_cap,btc_trade_volume,btc_blocks_size,btc_avg_block_size,btc_n_orphaned_blocks,btc_n_transactions_per_block,btc_median_confirmation_time,btc_hash_rate,...,btc_cost_per_transaction_percent,btc_cost_per_transaction,btc_n_unique_addresses,btc_n_transactions,btc_n_transactions_total,btc_n_transactions_excluding_popular,btc_n_transactions_excluding_chains_longer_than_100,btc_output_volume,btc_estimated_transaction_volume,btc_estimated_transaction_volume_usd
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2/17/2010,0.0,2043200.0,0.0,0.0,0.0,0.000235,0,1.0,0.0,2.9e-05,...,31.781022,0.0,241,244,41240,244,244,65173.13,36500.0,0.0
2/18/2010,0.0,2054650.0,0.0,0.0,0.0,0.000241,0,1.0,0.0,2.9e-05,...,154.463801,0.0,234,235,41475,235,235,18911.74,7413.0,0.0
2/19/2010,0.0,2063600.0,0.0,0.0,0.0,0.000228,0,1.0,0.0,2.3e-05,...,1278.516635,0.0,185,183,41658,183,183,9749.98,700.0,0.0
2/20/2010,0.0,2074700.0,0.0,0.0,0.0,0.000218,0,1.0,0.0,2.8e-05,...,22186.68799,0.0,224,224,41882,224,224,11150.03,50.0,0.0
2/21/2010,0.0,2085400.0,0.0,0.0,0.0,0.000234,0,1.0,0.0,2.7e-05,...,689.179876,0.0,218,218,42100,218,218,12266.83,1553.0,0.0


In [3]:
# Dimensions of Data

df.shape

(2906, 23)

In [4]:
# Structure of Data

df.info()

<class 'pandas.core.frame.DataFrame'>
Index: 2906 entries, 2/17/2010 to 1/31/2018
Data columns (total 23 columns):
 #   Column                                               Non-Null Count  Dtype  
---  ------                                               --------------  -----  
 0   btc_market_price                                     2906 non-null   float64
 1   btc_total_bitcoins                                   2879 non-null   float64
 2   btc_market_cap                                       2906 non-null   float64
 3   btc_trade_volume                                     2885 non-null   float64
 4   btc_blocks_size                                      2877 non-null   float64
 5   btc_avg_block_size                                   2906 non-null   float64
 6   btc_n_orphaned_blocks                                2906 non-null   int64  
 7   btc_n_transactions_per_block                         2906 non-null   float64
 8   btc_median_confirmation_time                         2894 no

In [5]:
# Descriptive Statistics

df.describe().transpose()

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
btc_market_price,2906.0,839.1042,2304.972,0.0,6.653465,235.13,594.1912,19498.68
btc_total_bitcoins,2879.0,11511380.0,4200024.0,2043200.0,8485300.0,12431150.0,15200510.0,16837690.0
btc_market_cap,2906.0,13443520000.0,38668410000.0,0.0,53630810.0,3346869000.0,8075525000.0,327000000000.0
btc_trade_volume,2885.0,73983810.0,292422800.0,0.0,291645.6,10014140.0,28340380.0,5352016000.0
btc_blocks_size,2877.0,35505.5,43618.63,0.0,781.0,15183.0,58293.0,154444.6
btc_avg_block_size,2906.0,0.3503659,0.3531685,0.000216335,0.02417726,0.1960218,0.6760651,1.110327
btc_n_orphaned_blocks,2906.0,0.3640743,0.8422593,0.0,0.0,0.0,0.0,7.0
btc_n_transactions_per_block,2906.0,671.6737,689.5613,1.0,54.0,375.0,1232.995,2722.625
btc_median_confirmation_time,2894.0,7.501113,4.974549,0.0,6.066667,7.916667,10.20833,47.73333
btc_hash_rate,2906.0,1244070.0,2924141.0,2.25e-05,11.6088,21761.89,1035363.0,21609750.0


In [6]:
# Checking for Null Values

a = df.isnull().sum().sort_values(ascending = False)
a

btc_blocks_size                                        29
btc_total_bitcoins                                     27
btc_trade_volume                                       21
btc_difficulty                                         16
btc_median_confirmation_time                           12
btc_transaction_fees                                   10
btc_estimated_transaction_volume_usd                    0
btc_market_cap                                          0
btc_avg_block_size                                      0
btc_n_orphaned_blocks                                   0
btc_n_transactions_per_block                            0
btc_hash_rate                                           0
btc_miners_revenue                                      0
btc_estimated_transaction_volume                        0
btc_cost_per_transaction_percent                        0
btc_cost_per_transaction                                0
btc_n_unique_addresses                                  0
btc_n_transact

In [7]:
# Columns with Null Values

a[a > 0]

btc_blocks_size                 29
btc_total_bitcoins              27
btc_trade_volume                21
btc_difficulty                  16
btc_median_confirmation_time    12
btc_transaction_fees            10
dtype: int64

In [8]:
# Filling the Missing Values with the Mean Value

b = a[a > 0].index
for i in b:
    if df[i].dtype == 'int64' or df[i].dtype == 'float64':
        df[i].fillna(df[i].mean(), inplace = True)
        
df.isnull().sum()

btc_market_price                                       0
btc_total_bitcoins                                     0
btc_market_cap                                         0
btc_trade_volume                                       0
btc_blocks_size                                        0
btc_avg_block_size                                     0
btc_n_orphaned_blocks                                  0
btc_n_transactions_per_block                           0
btc_median_confirmation_time                           0
btc_hash_rate                                          0
btc_difficulty                                         0
btc_miners_revenue                                     0
btc_transaction_fees                                   0
btc_cost_per_transaction_percent                       0
btc_cost_per_transaction                               0
btc_n_unique_addresses                                 0
btc_n_transactions                                     0
btc_n_transactions_total       

In [9]:
# MicroTask_2

df.iloc[1023]['btc_market_price']

13.6888

In [10]:
# MicroTask_3

df.corr()['btc_market_price'][1:].sort_values(ascending = False)

btc_market_cap                                         0.999783
btc_miners_revenue                                     0.986732
btc_estimated_transaction_volume_usd                   0.968308
btc_hash_rate                                          0.920876
btc_difficulty                                         0.916913
btc_trade_volume                                       0.868608
btc_cost_per_transaction                               0.815507
btc_transaction_fees                                   0.806568
btc_n_transactions_total                               0.680048
btc_blocks_size                                        0.674059
btc_n_unique_addresses                                 0.663917
btc_n_transactions                                     0.569603
btc_n_transactions_excluding_chains_longer_than_100    0.567281
btc_n_transactions_excluding_popular                   0.558416
btc_n_transactions_per_block                           0.544125
btc_avg_block_size                      

In [11]:
# Splitting into X & Y

X = df.values[:,1:]
Y = df.values[:,0]

In [12]:
# Scaling the Data

sc = StandardScaler()
sc.fit(X)
X = sc.transform(X)

In [13]:
# Train_Test_Split

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.2, random_state = 10)

In [14]:
# Model Building

linear_reg = LinearRegression()
linear_reg.fit(X_train, Y_train)
Y_pred = linear_reg.predict(X_test)

In [15]:
# MicroTask_5 - Model Evaluation
# Calculating mse

mse = int(round(mean_squared_error(Y_test,Y_pred)))
print('MSE Value:',mse,'\n')

MSE Value: 319 

