In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

# Generate example data (you'd replace this with your actual data)
np.random.seed(42)
num_samples = 1000
date_range = pd.date_range('2022-01-01', periods=num_samples)
historical_prices = np.random.normal(150, 10, num_samples).cumsum()
trading_volume = np.random.randint(1000, 5000, num_samples)

# Creating a DataFrame with generated data
data = pd.DataFrame({'Date': date_range, 'Historical_Price': historical_prices, 'Volume': trading_volume})

# Creating target variable: Future stock prices (shifting historical prices)
data['Future_Price'] = data['Historical_Price'].shift(-5)  # Shift prices 5 days into the future for demonstration

# Feature Engineering: Calculate simple moving averages for historical prices and volume
data['SMA_5'] = data['Historical_Price'].rolling(window=5).mean()
data['SMA_20'] = data['Historical_Price'].rolling(window=20).mean()
data['Volume_SMA_5'] = data['Volume'].rolling(window=5).mean()

# Drop NaN values resulting from rolling averages
data.dropna(inplace=True)

# Features and target variable
features = ['Historical_Price', 'Volume', 'SMA_5', 'SMA_20', 'Volume_SMA_5']
target = 'Future_Price'

# Splitting data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)

# Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

# Predicting on test set
predictions = model.predict(X_test)

# Model Evaluation
mse = mean_squared_error(y_test, predictions)
print(f"Mean Squared Error: {mse}")

# Plotting actual vs. predicted prices
plt.figure(figsize=(10, 6))
plt.scatter(y_test, predictions)
plt.xlabel('Actual Future Price')
plt.ylabel('Predicted Future Price')
plt.title('Actual vs. Predicted Future Stock Prices')
plt.show()
