Data predictions

 - Linear Regression
 - Decision Tree
 -Random Forest
 - Support Vector Machines
 - Neural Networks
 - Gradient Boosting Machines
 - Time Series Analysis


In [1]:
# imports

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.neural_network import MLPRegressor
from sklearn.ensemble import GradientBoostingRegressor
from statsmodels.tsa.arima.model import ARIMA

In [2]:
# get the data and organize it

url = 'https://raw.githubusercontent.com/CVanchieri/CryptoThings/main/Data/CompiledCrypto/ALLDATA.csv'
df = pd.read_csv(url)
df.sort_values(by='Date', inplace=True)
df.fillna(0, inplace=True)

In [3]:
# show the dataframe details

print(f"Things = {df['Name'].unique()}")
print(f"Dates = {df['Date'].iloc[0]} - {df['Date'].iloc[-1]}")
print(df.shape)
df.head()

Things = ['GOLD' 'ADA' 'SOL' 'ETH' 'BTC' 'GAS' 'OIL' 'XRP']
Dates = 2023-08-16 - 2024-08-16
(2613, 8)


Unnamed: 0,Date,Name,Open,High,Low,Close,Change,%
0,2023-08-16,GOLD,0.0,0.0,0.0,1904.2,0.0,0.0
2245,2023-08-16,ADA,0.2832,0.2832,0.2686,0.271,0.0008,0.0
1878,2023-08-16,SOL,24.04,24.04,22.63,22.7,0.0,0.0
1511,2023-08-16,ETH,1827.76,1829.71,1800.13,1807.39,91.4,5.06
1144,2023-08-16,BTC,29153.0,29221.0,28928.0,29089.0,1232.0,4.24


In [4]:
# Linear Regression

names = ['GOLD', 'OIL', 'GAS', 'BTC', 'ETH', 'SOL', 'ADA', 'XRP'] # names to predict for
date = '09/01/2024' # pick a date

for name in names:

    name_df = df[df['Name'] == name] # filter the data for the given names
    if name_df.empty:
        print(f"No data found for {name} price. Check your dataset.")
        continue

    X = pd.to_datetime(name_df['Date']).astype(int).values.reshape(-1, 1)  # convert dates to numbers
    y = name_df['Close'].values # get the close values

    model = LinearRegression() # linear regression
    model.fit(X, y) # fit model

    # Predict the price for 09/01/2024 (date)
    future_date = pd.to_datetime(date).value  # get the number value of the given date
    predicted_price = model.predict([[future_date]]) # predict the model on the given date

    print(f"LR - Predicted {name} price on {date} (Linear Regression): {predicted_price[0]}")

LR - Predicted GOLD price on 09/01/2024 (Linear Regression): 2475.128768515253
LR - Predicted OIL price on 09/01/2024 (Linear Regression): 79.15541466031924
LR - Predicted GAS price on 09/01/2024 (Linear Regression): 1.792110990000367
LR - Predicted BTC price on 09/01/2024 (Linear Regression): 76019.13263496291
LR - Predicted ETH price on 09/01/2024 (Linear Regression): 3819.213124173373
LR - Predicted SOL price on 09/01/2024 (Linear Regression): 199.46017585419395
LR - Predicted ADA price on 09/01/2024 (Linear Regression): 0.5305853821171436
LR - Predicted XRP price on 09/01/2024 (Linear Regression): 0.5432244837599849


In [5]:
# Decision Trees

names = ['GOLD', 'OIL', 'GAS', 'BTC', 'ETH', 'SOL', 'ADA', 'XRP'] # names to predict for
date = '09/01/2024' # pick a date

for name in names:

    name_df = df[df['Name'] == name] # filter the data for the given names
    if name_df.empty:
        print(f"No data found for {name} price. Check your dataset.")
        continue

    X = pd.to_datetime(name_df['Date']).astype(int).values.reshape(-1, 1) # convert dates to numbers
    y = name_df['Close'].values # get the close values

    model = DecisionTreeRegressor() # decision tree
    model.fit(X, y) # fit model

    # Predict the price for 09/01/2024 (date)
    future_date = pd.to_datetime(date).value  # get the number value of the given date
    predicted_price = model.predict([[future_date]]) # predict the model on the given date

    print(f"DT - Predicted {name} price on {date} (Decision Tree): {predicted_price[0]}")

DT - Predicted GOLD price on 09/01/2024 (Decision Tree): 2485.8
DT - Predicted OIL price on 09/01/2024 (Decision Tree): 76.75
DT - Predicted GAS price on 09/01/2024 (Decision Tree): 2.12
DT - Predicted BTC price on 09/01/2024 (Decision Tree): 59736.0
DT - Predicted ETH price on 09/01/2024 (Decision Tree): 2612.28
DT - Predicted SOL price on 09/01/2024 (Decision Tree): 140.62
DT - Predicted ADA price on 09/01/2024 (Decision Tree): 0.3324
DT - Predicted XRP price on 09/01/2024 (Decision Tree): 0.5668


In [6]:
# Random Forests

names = ['GOLD', 'OIL', 'GAS', 'BTC', 'ETH', 'SOL', 'ADA', 'XRP'] # names to predict for
date = '09/01/2024' # pick a date

for name in names:

    name_df = df[df['Name'] == name] # filter the data for the given names
    if name_df.empty:
        print(f"No data found for {name} price. Check your dataset.")
        continue

    X = pd.to_datetime(name_df['Date']).astype(int).values.reshape(-1, 1) # convert dates to numbers
    y = name_df['Close'].values # get the close values

    model = RandomForestRegressor() # random forest
    model.fit(X, y) # fit model

       # Predict the price for 09/01/2024 (date)
    future_date = pd.to_datetime(date).value  # get the number value of the given date
    predicted_price = model.predict([[future_date]]) # predict the model on the given date

    print(f"RF - Predicted {name} price on {date} (Random Forests): {predicted_price[0]}")

RF - Predicted GOLD price on 09/01/2024 (Random Forests): 2474.427999999998
RF - Predicted OIL price on 09/01/2024 (Random Forests): 77.75599999999994
RF - Predicted GAS price on 09/01/2024 (Random Forests): 2.1197
RF - Predicted BTC price on 09/01/2024 (Random Forests): 59076.43
RF - Predicted ETH price on 09/01/2024 (Random Forests): 2602.281399999997
RF - Predicted SOL price on 09/01/2024 (Random Forests): 141.02540000000016
RF - Predicted ADA price on 09/01/2024 (Random Forests): 0.330926
RF - Predicted XRP price on 09/01/2024 (Random Forests): 0.5651470000000007


In [7]:
# Support Vector Machines (SVMs)

names = ['GOLD', 'OIL', 'GAS', 'BTC', 'ETH', 'SOL', 'ADA', 'XRP'] # names to predict for
date = '09/01/2024' # pick a date

for name in names:

    name_df = df[df['Name'] == name] # filter the data for the given names
    if name_df.empty:
        print(f"No data found for {name} price. Check your dataset.")
        continue

    X = pd.to_datetime(name_df['Date']).astype(int).values.reshape(-1, 1) # convert dates to numbers
    y = name_df['Close'].values # get the close values

    svm_model = SVR() # support vector machine
    svm_model.fit(X, y) # fit model
    svm_predicted_price = svm_model.predict([[future_date]]) # predict the model on the given date

    print(f"SVM - Predicted {name} price on {date} (Support Vector Machines): {svm_predicted_price[0]}")

SVM - Predicted GOLD price on 09/01/2024 (Support Vector Machines): 2156.8715917215104
SVM - Predicted OIL price on 09/01/2024 (Support Vector Machines): 78.96933579971105
SVM - Predicted GAS price on 09/01/2024 (Support Vector Machines): 1.6392501435905174
SVM - Predicted BTC price on 09/01/2024 (Support Vector Machines): 51162.84536965434
SVM - Predicted ETH price on 09/01/2024 (Support Vector Machines): 2626.4324838876983
SVM - Predicted SOL price on 09/01/2024 (Support Vector Machines): 126.77804535469063
SVM - Predicted ADA price on 09/01/2024 (Support Vector Machines): 0.4182862983279187
SVM - Predicted XRP price on 09/01/2024 (Support Vector Machines): 0.5725984389031069


In [8]:
# Neural Networks

names = ['GOLD', 'OIL', 'GAS', 'BTC', 'ETH', 'SOL', 'ADA', 'XRP'] # names to predict for
date = '09/01/2024' # pick a date

for name in names:

    name_df = df[df['Name'] == name] # filter the data for the given names
    if name_df.empty:
        print(f"No data found for {name} price. Check your dataset.")
        continue

    X = pd.to_datetime(name_df['Date']).astype(int).values.reshape(-1, 1) # convert dates to numbers
    y = name_df['Close'].values # get the close values

    nn_model = MLPRegressor(hidden_layer_sizes=(100,), activation='relu', solver='adam', random_state=42, max_iter=500) # neural network
    nn_model.fit(X, y) # fit model
    nn_predicted_price = nn_model.predict([[future_date]])  # predict the model on the given date

    print(f"Neural Network - Predicted {name} price on {date} (Neural Networks): {nn_predicted_price[0]}")


Neural Network - Predicted GOLD price on 09/01/2024 (Neural Networks): -22103143476388.715
Neural Network - Predicted OIL price on 09/01/2024 (Neural Networks): -22103143478436.715
Neural Network - Predicted GAS price on 09/01/2024 (Neural Networks): 5891935495578.284
Neural Network - Predicted BTC price on 09/01/2024 (Neural Networks): -1687849000988.216
Neural Network - Predicted ETH price on 09/01/2024 (Neural Networks): -1687849049056.716
Neural Network - Predicted SOL price on 09/01/2024 (Neural Networks): -1687849051648.216
Neural Network - Predicted ADA price on 09/01/2024 (Neural Networks): -1687849051773.716
Neural Network - Predicted XRP price on 09/01/2024 (Neural Networks): -1687849051771.216


In [9]:
# Gradient Boosting Machines (GBMs)

names = ['GOLD', 'OIL', 'GAS', 'BTC', 'ETH', 'SOL', 'ADA', 'XRP'] # names to predict for
date = '09/01/2024' # pick a date

for name in names:

    name_df = df[df['Name'] == name] # filter the data for the given names
    if name_df.empty:
        print(f"No data found for {name} price. Check your dataset.")
        continue

    X = pd.to_datetime(name_df['Date']).astype(int).values.reshape(-1, 1) # convert dates to numbers
    y = name_df['Close'].values # get the close values

    # Gradient Boosting Machine
    gbm_model = GradientBoostingRegressor() # gradient boost machine
    gbm_model.fit(X, y) # fit model
    gbm_predicted_price = gbm_model.predict([[future_date]])  # predict the model on the given date

    print(f"GBM - Predicted {name} price on {date} (Gradient Boosting Machines): {gbm_predicted_price[0]}")

GBM - Predicted GOLD price on 09/01/2024 (Gradient Boosting Machines): 2480.467732775644
GBM - Predicted OIL price on 09/01/2024 (Gradient Boosting Machines): 77.22482794115567
GBM - Predicted GAS price on 09/01/2024 (Gradient Boosting Machines): 2.100187939761494
GBM - Predicted BTC price on 09/01/2024 (Gradient Boosting Machines): 59247.55773474076
GBM - Predicted ETH price on 09/01/2024 (Gradient Boosting Machines): 2588.056104956306
GBM - Predicted SOL price on 09/01/2024 (Gradient Boosting Machines): 141.66868617121912
GBM - Predicted ADA price on 09/01/2024 (Gradient Boosting Machines): 0.33220052597103844
GBM - Predicted XRP price on 09/01/2024 (Gradient Boosting Machines): 0.5640007850337962


In [10]:
# Time Series Analysis

names = ['GOLD', 'OIL', 'GAS', 'BTC', 'ETH', 'SOL', 'ADA', 'XRP'] # names to predict for
date = '09/01/2024' # pick a date

for name in names:

    name_df = df[df['Name'] == name].copy()  # Create an explicit copy
    if name_df.empty:
        print(f"No data found for {name} price. Check your dataset.")
        continue

    # Prepare data for time series analysis
    name_df['Date'] = pd.to_datetime(name_df['Date'])
    name_df.set_index('Date', inplace=True)
    y = name_df['Close'] # get the close values

    y = y.resample('D').last()  # 'D' for daily, adjust if needed

    # (p, d, q) are the model orders. You might need to tune these.
    model = ARIMA(y, order=(5, 1, 0)) # arima
    model_fit = model.fit() # fit model

    forecast_result = model_fit.forecast(steps=1) # predict the price for the next day after the end of your data
    predicted_price = forecast_result.iloc[0]  # access the first (and only) predicted value
    last_date = y.index[-1] # fet the last date in your data

    date_dt = pd.to_datetime(date) # convert the 'date' string to a datetime object
    prediction_date = date_dt + pd.DateOffset(days=0) # calculate the prediction date (next day)

    print(f"ARIMA - Predicted {name} price on {prediction_date.date()} (Time Series Analysis): {predicted_price}")

ARIMA - Predicted GOLD price on 2024-09-01 (Time Series Analysis): 2482.287822647026
ARIMA - Predicted OIL price on 2024-09-01 (Time Series Analysis): 75.92956940608991
ARIMA - Predicted GAS price on 2024-09-01 (Time Series Analysis): 2.0913340756844994
ARIMA - Predicted BTC price on 2024-09-01 (Time Series Analysis): 59487.56868452981
ARIMA - Predicted ETH price on 2024-09-01 (Time Series Analysis): 2617.6925181443644
ARIMA - Predicted SOL price on 2024-09-01 (Time Series Analysis): 140.44106549754198
ARIMA - Predicted ADA price on 2024-09-01 (Time Series Analysis): 0.33217802057908136
ARIMA - Predicted XRP price on 2024-09-01 (Time Series Analysis): 0.5668503961242893
