In [2]:
import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [3]:
# Load the data
data = pd.read_csv("btcusd_1-min_data.csv")
data.dropna(inplace=True)

# Convert 'Timestamp' to datetime and sort by time
data['Timestamp'] = pd.to_datetime(data['Timestamp'])
data = data.sort_values(by='Timestamp', ascending=True)

# Create new features for the next day's High, Low, and Close prices
data['Next_High'] = data['High'].shift(1)
data['Next_Low'] = data['Low'].shift(1)
data['Next_Close'] = data['Close'].shift(1)

# Drop rows with NaN values resulting from the shift
data = data.dropna(subset=['Next_High', 'Next_Low', 'Next_Close'])

# Drop unnecessary columns
data = data.drop(columns=['Timestamp'])

# Define the features and target columns
features = ['Open', 'High', 'Low', 'Close', 'Volume']
target = ['Next_High', 'Next_Low', 'Next_Close']

X = data[features]
y = data[target]

# Ensure alignment of X and y
assert len(X) == len(y), "X and y are not aligned!"

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Define the pipeline
pipeline = Pipeline([
    # StandardScaler for scaling features
    ('scaler', ColumnTransformer([
        ('scale_features', StandardScaler(), features)
    ], remainder='passthrough')),
    
    # Linear Regression Model
    ('model', LinearRegression())
])

# Train the pipeline
pipeline.fit(X_train, y_train)

# Make predictions
y_pred = pipeline.predict(X_test)

# Evaluate the model
print("Linear Regression - Performance:")
print(f"RMSE: {mean_squared_error(y_test, y_pred, squared=False)}")
print(f"MAE: {mean_absolute_error(y_test, y_pred)}")
print(f"R2: {r2_score(y_test, y_pred)}")

Linear Regression - Performance:
RMSE: 16.41133953244782
MAE: 5.815640087222708
R2: 0.9999992966246646


In [4]:
# Function to predict next High, Low, Close from user input
def predict_next_prices(pipeline, input_features):
    input_df = pd.DataFrame([input_features], columns=features)
    prediction = pipeline.predict(input_df)
    return prediction

In [6]:
# Example user input
user_input = {
    'Open': 100491,
    'High': 100616,
    'Low': 97515,
    'Close': 100050,
    'Volume': 74990
}

# Predict next High, Low, and Close
predicted_values = predict_next_prices(pipeline, user_input)
print(f"Predicted Next High, Low, Close: {predicted_values}")

Predicted Next High, Low, Close: [[101755.3330404   99445.14322964 100481.98579081]]
