In [None]:
# Import necessary libraries
import pandas as pd
import numpy as np
import yfinance as yf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Step 1: Download Historical Data
def download_data(stock_symbol, start_date, end_date):
    stock_data = yf.download(stock_symbol, start=start_date, end=end_date)
    return stock_data

# Set parameters
stock_symbol = 'AAPL'  # Example: Apple Inc.
start_date = '2015-01-01'
end_date = '2024-01-01'

# Download stock data
data = download_data(stock_symbol, start_date, end_date)
print(data.head())

# Step 2: Data Preprocessing
# Reset index to have 'Date' as a column
data.reset_index(inplace=True)

# Step 3: Feature Engineering
# Create a new column 'Target' that is the next day's closing price
data['Target'] = data['Close'].shift(-1)

# Drop the last row as it has no target
data.dropna(inplace=True)

# Features and Labels
X = data[['Open', 'High', 'Low', 'Close', 'Volume']]
y = data['Target']

# Step 4: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Model Training
model = LinearRegression()
model.fit(X_train, y_train)

# Step 6: Predictions
y_pred = model.predict(X_test)

# Step 7: Model Evaluation
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Mean Squared Error: {mse:.2f}')
print(f'R^2 Score: {r2:.2f}')

# Plotting Actual vs Predicted Prices
plt.figure(figsize=(14, 7))
plt.plot(y_test.values, label='Actual Prices', color='blue')
plt.plot(y_pred, label='Predicted Prices', color='orange')
plt.title(f'Stock Price Prediction for {stock_symbol}')
plt.xlabel('Days')
plt.ylabel('Price')
plt.legend()
plt.show()

# Example Function to Predict Future Prices
def predict_future_price(data, model, n_days):
    # Use the last known price as the starting point
    last_row = data.iloc[-1]
    future_prices = []

    for _ in range(n_days):
        # Prepare the input for the model
        input_data = np.array([last_row['Open'], last_row['High'], last_row['Low'], last_row['Close'], last_row['Volume']]).reshape(1, -1)
        # Predict the next day's price
        next_price = model.predict(input_data)
        future_prices.append(next_price[0])

        # Update the last row to simulate moving forward in time
        last_row = last_row.copy()
        last_row['Close'] = next_price[0]
        last_row['Open'] = next_price[0]  # Assume next day's open is equal to previous close
        last_row['High'] = next_price[0] + np.random.uniform(0.5, 2.0)  # Add some randomness
        last_row['Low'] = next_price[0] - np.random.uniform(0.5, 2.0)  # Add some randomness
        last_row['Volume'] = last_row['Volume']  # Keeping volume constant for simplicity

    return future_prices

# Example usage to predict the next 5 days' prices
future_prices = predict_future_price(data, model, n_days=5)
print("Predicted Future Prices for the Next 5 Days:")
print(future_prices)