In [1]:
import requests
import json

# Define the endpoint for the CryptoCompare API
url = 'https://min-api.cryptocompare.com/data/histoday'

# Define the parameters for the API request
parameters = {
    'fsym': 'ETH',
    'tsym': 'USD',
    'limit': '2000'
}

# Make the API request
response = requests.get(url, params=parameters)

# Parse the response
data = json.loads(response.text)

# Print the historical data
print(data['Data'][:10])  # Only Showing first 10 values just for confirmation

# The data variable contains 2000 records which have been extrected from the CryptoCompare API
# Note: This Free web-API doesn't allow us to extract more than 2000 records


[{'time': 1512518400, 'high': 456.87, 'low': 413.13, 'open': 453.96, 'volumefrom': 1074639.32, 'volumeto': 470223347.34, 'close': 422.48, 'conversionType': 'direct', 'conversionSymbol': ''}, {'time': 1512604800, 'high': 434.85, 'low': 404.16, 'open': 422.39, 'volumefrom': 992479.84, 'volumeto': 416652301.63, 'close': 421.15, 'conversionType': 'direct', 'conversionSymbol': ''}, {'time': 1512691200, 'high': 461.1, 'low': 410.33, 'open': 420.76, 'volumefrom': 1095872.72, 'volumeto': 479837604.63, 'close': 451.74, 'conversionType': 'direct', 'conversionSymbol': ''}, {'time': 1512777600, 'high': 507.36, 'low': 448.09, 'open': 451.78, 'volumefrom': 1093313.72, 'volumeto': 522718280.71, 'close': 472.86, 'conversionType': 'direct', 'conversionSymbol': ''}, {'time': 1512864000, 'high': 472.78, 'low': 428.3, 'open': 472.31, 'volumefrom': 694853.93, 'volumeto': 307733091.95, 'close': 436.49, 'conversionType': 'direct', 'conversionSymbol': ''}, {'time': 1512950400, 'high': 519.35, 'low': 436.01, '

In [2]:
import pandas as pd

# Load the data into a pandas DataFrame
df = pd.DataFrame(data['Data'])

# Convert the DataFrame to a CSV file
df.to_csv('eth_data.csv', index=False)


In [3]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
import pandas as pd

# Load the data into a pandas DataFrame
df = pd.read_csv('eth_data.csv')

# Define the features and target
X = df[['high', 'low', 'open','volumefrom','volumeto', 'close']]
y = df['close']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Linear Regression model
model = LinearRegression()

# Train the model on the training data
model.fit(X_train, y_train)

# Use the model to make predictions on the test data
y_pred = model.predict(X_test)

print(y_pred[:100])   #Only Showing first 30 predictions just for confirmation

[3000.83        112.66       2556.41        243.66       2233.57
 2706.52        226.59       1557.54        877.88        378.44
 1111.31        352.61       4135.31        268.99         84.3
  215.69        132.31        494.96       4299.27       2547.03
  180.88       2228.67       2772.84        604.98       1296.63
  610.56        568.26        317.5         212.87000001  944.83
  188.43       2196.71        294.14       2066.55       1315.47
  135.56       2940.31       3490.61       1699.89        317.57
  399.02       1763.2         278.31        226.64000001 2742.48
 2966.51        157.09       1866.8         920.11        320.19
 1472.51        332.28        638.17        469.93        242.66
 1552.71        346.94       1697.52       1426.4         116.24
 1379.35        180.72       2760.2         616.14       1822.22
 1189.27        132.83       1213.98        448.78        227.84
  136.64        140.87       3168.91        393.11        129.02
  202.43       3961.96    

In [4]:
# Evaluate the model's performance
score = model.score(X_test, y_test)
print(f'R^2 score: {score}')


R^2 score: 1.0


In [5]:
# Make predictions using the trained model
import sqlite3
y_pred = model.predict(X_test)

# Convert predictions to a DataFrame
predictions_df = pd.DataFrame(y_pred, columns=['predicted_price'])

# Import the SQLite3 library

# Connect to a SQLite database
conn = sqlite3.connect('predictions.db')

# Store the predictions in a SQLite table
predictions_df.to_sql('predictions', conn, if_exists='replace')


401

In [6]:
# Read the 'predictions' table into a DataFrame
predictions_df = pd.read_sql_query('SELECT * FROM predictions', conn)

# Print the first 5 rows of the DataFrame
print(predictions_df.head())


   index  predicted_price
0      0          3000.83
1      1           112.66
2      2          2556.41
3      3           243.66
4      4          2233.57


In [7]:
from sklearn.metrics import mean_absolute_error, mean_squared_error
from math import sqrt
import matplotlib.pyplot as plt

# Calculate MAE
mae = mean_absolute_error(y_test, predictions_df['predicted_price'])

# Calculate MSE
mse = mean_squared_error(y_test, predictions_df['predicted_price'])

# Calculate RMSE
rmse = sqrt(mse)

# Print the results
print(f'MAE: {mae}')
print(f'MSE: {mse}')
print(f'RMSE: {rmse}')


MAE: 1.6375713641849998e-09
MSE: 4.244947673443623e-18
RMSE: 2.0603270792385425e-09


In [8]:
from sklearn.metrics import r2_score

# Calculate R-squared score
r2 = r2_score(y_test, predictions_df['predicted_price'])

# Print the R-squared score
print(f'R-squared score: {r2}')


R-squared score: 1.0
