In [None]:
import pandas as pd
import numpy as np
crypto_file = pd.read_csv(r'C:\Users\muham\Downloads\crypto currency data.csv')

In [None]:
# we check the data types of all columns
crypto_file.dtypes

In [None]:
# we first have to convert time into a datetime column as its marked as a non-numerical object and we cannot analyse
# non-numerical objects with machine learning
from datetime import datetime
date_time_str = crypto_file['time']

crypto_file['time'] = pd.to_datetime(pd.Series(crypto_file['time']))

In [None]:
crypto_file.dtypes

# Before making any analysis we first check if the data is clean with no missing values

In [None]:
crypto_file.isnull().sum()

# Thus the file is clean and we can proceed now with exploratory data analysis as there are 0 missing values in all located features


In [None]:
# lets see all correlations in the data frame first
crypto_file.corr()

In [None]:
crypto_file.drop(['noticeActive'], axis=1, inplace=True)
crypto_file.drop(['time'], axis=1, inplace=True)

In [None]:
# generate normally distributed data with STD 13 and Mean 250
series1 = crypto_file['low']

#generate another series based on data 1 with some added noise
series2 = crypto_file['open']

# summarize
print('data1: mean=%.2f stdv=%.2f' % (np.mean(series1), np.std(series1)))
print('data2: mean=%.2f stdv=%.2f' % (np.mean(series2), np.std(series2)))

# plot
import matplotlib.pyplot as plt
plt.scatter(series1, series2, c='red')
plt.show()

# so we can see that how perfectly linear the distribution of trend is between Open and Low

In [None]:
import missingno as mn
mn.heatmap(crypto_file)

# We can verify here that the data is clean and there is no missing value in any column

In [None]:
# lets use a boxplot to determine if there are outliers as we cannot see it through 
# histogram
crypto_file[['open','high','close', 'low']].plot.box();

In [None]:
from matplotlib import pyplot as plt
# we shall now see how each of the four, vaires with Signal
# we shall now use a Scatter PLot to analyse the outliers and see the relations Signal
with plt.style.context("default"):
    fig, axes =plt.subplots(ncols=4, sharey=True,
                            gridspec_kw={'width_ratios':[1,1,1,1], 'wspace':0})
    y ='Signal'
    xs =['open','high','close', 'low']
    for x, ax in zip(xs, axes):
        ax.scatter(crypto_file[x],crypto_file[y])
        ax.set_xlabel(x)
        axes[0].set_ylabel(y);

# So we  see a very closely linked trend that signals give to open and closing prices along with high and low 

#________________________________#

# Now We Make predictions 

In [None]:
# we start with predicting the Open using DecisionTreeClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
from sklearn.linear_model import LogisticRegression

In [None]:
# We make predictions for Open prices
x = crypto_file['open']
y =  crypto_file.drop(['open'], axis=1)

In [None]:
# we split the data into 50% training set and 50% testing set
from sklearn.model_selection import train_test_split, cross_val_score, KFold
tx, tex, ty, tey = train_test_split(x,y,test_size=0.5,random_state=0)

In [None]:
# we convert all data into 2 dimensions
tx2d = tx.values.reshape(1,-1)
ty2d = ty.values.reshape(1,-1)
tey2d = tey.values.reshape(1,-1)
tex2d = tex.values.reshape(1,-1)

In [None]:
# we fit the data in the model here
DCT = RandomForestRegressor()
DCT.fit(tx2d,ty2d)

In [None]:
train = DCT.fit(tx2d,ty2d)

In [None]:
DCT.predict(tex2d)

In [None]:
prediction_of_open = DCT.predict(tex2d)
from sklearn.metrics import accuracy_score, r2_score, classification_report

In [None]:
# evaluate the model
from numpy import std, mean
cv1 = KFold(n_splits=10, random_state=12,shuffle= True)
scores = cross_val_score(DCT, x, y, scoring='accuracy', cv=cv1, n_jobs=-1)

In [None]:
# lets try recall as a scoring 
scores1 = cross_val_score(DCT, x, y, scoring='recall', cv=cv1, n_jobs=-1)
print('Accuracy: %.3f(%.3f)' % (mean(scores1), std(scores1)))

In [None]:
# lets try precision as a scoring
scores2 = cross_val_score(DCT, x, y, scoring='precision', cv=cv1, n_jobs=-1)
print('Accuracy: %.3f(%.3f)' % (mean(scores2), std(scores2)))

In [None]:
plt.hist(prediction_of_open);