In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
import ccxt

# # Load the dataset using pandas
# data1 = pd.read_csv('../datasets/training_dataset.csv')
# data = pd.read_csv('../datasets/testing_dataset.csv')

# Collecting stock historical data from yahoo
ticker = "BTC-USD"
start_date = "2018-02-01"
end_date = "2022-01-01"

# Download historical data
data = yf.download(ticker, start=start_date, end=end_date)
data_test = yf.download(ticker, start="2022-02-01", end="2023-01-01")
# Display the first few rows of the dataset for inspection
print(data)



In [None]:
X_features = ['Open', "Volume"]
target_column = 'Close'

# Extract features (X_train) and target (y_train)
X_train = data[X_features]
y_train = data[target_column]

# Split the data into training and testing sets
X_test = data_test[X_features]
y_test = data_test[target_column]

# Display the shape of the training and testing sets
print("\nShape of X_train:", X_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of y_test:", y_test.shape)

In [None]:
fig, axs = plt.subplots(2, figsize=(12, 8))

axs[0].scatter(X_train['Open'], y_train)
axs[0].set_title('Open vs Close')

axs[1].scatter(X_train['Volume'], y_train)
axs[1].set_title('Volumeto vs Close')

plt.tight_layout()
plt.show()

In [None]:
linear_model = LinearRegression()
linear_model.fit(X_train, y_train) 

In [None]:
b = linear_model.intercept_
w = linear_model.coef_
print(f"w = {w}, b = {b}")

In [None]:
from sklearn.metrics import mean_squared_error

predictions = linear_model.predict(X_test).tolist()

time_column = data_test.index.tolist()
# Plotting the actual values
plt.figure(figsize=(10, 6))
plt.plot(time_column,y_test, label='Actual', marker='o')

# Plotting the predicted values
plt.plot(time_column,predictions, label='Predicted', marker='x')


train_mse = mean_squared_error(y_test, predictions)
print(f"Training MSE: {train_mse}")
plt.show()

In [None]:
poly_features = PolynomialFeatures(2)
X_train_poly = poly_features.fit_transform(X_train)

# Create and fit the quadratic regression model
quadratic_model = make_pipeline(PolynomialFeatures(2), LinearRegression())
quadratic_model.fit(X_train, y_train)

predictions = quadratic_model.predict(X_test).tolist()

time_column =data_test.index.tolist()

# Plotting the actual values
plt.figure(figsize=(10, 6))
plt.plot(time_column, y_test, label='Actual', marker='o')

# Plotting the predicted values
plt.plot(time_column, predictions, label='Predicted', marker='x')


train_mse = mean_squared_error(y_test, predictions)
print(f"Training MSE for quadratic polynomial: {train_mse}")
plt.show()

In [None]:
poly_features = PolynomialFeatures(3)
X_train_poly = poly_features.fit_transform(X_train)

# Create and fit the quadratic regression model
quadratic_model = make_pipeline(PolynomialFeatures(3), LinearRegression())
quadratic_model.fit(X_train, y_train)

predictions = quadratic_model.predict(X_test).tolist()
time_column =data_test.index.tolist()

# Plotting the actual values
plt.figure(figsize=(10, 6))
plt.plot(time_column, y_test, label='Actual', marker='o')

# Plotting the predicted values
plt.plot(time_column, predictions, label='Predicted', marker='x')


train_mse = mean_squared_error(y_test, predictions)
print(f"Training MSE for cubic polynomial: {train_mse}")
plt.show()

In [None]:
poly_features = PolynomialFeatures(4)
X_train_poly = poly_features.fit_transform(X_train)

# Create and fit the quadratic regression model
quadratic_model = make_pipeline(PolynomialFeatures(4), LinearRegression())
quadratic_model.fit(X_train, y_train)

predictions = quadratic_model.predict(X_test).tolist()

time_column =data_test.index.tolist()

# Plotting the actual values
plt.figure(figsize=(10, 6))
plt.plot(time_column, y_test, label='Actual', marker='o')

# Plotting the predicted values
plt.plot(time_column, predictions, label='Predicted', marker='x')


train_mse = mean_squared_error(y_test, predictions)
print(f"Training MSE for degree 4 polynomial: {train_mse}")
plt.show()

In [None]:
import pandas as pd
import matplotlib.pyplot as plt

data1 = pd.read_csv('../datasets/open_predicted_values.csv')
time_column =data_test.index.tolist()
plt.figure(figsize=(10, 6))
plt.plot(time_column, data_test['Close'], label='Actual', marker='o')

# Plotting the predicted values
plt.plot(time_column, data1['close'], label='Predicted', marker='x')