<a href="https://colab.research.google.com/github/Chetankamineni/stock-prediction/blob/main/test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import yfinance as yf
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go

# --- Step 1: Data Acquisition ---
# Define the stock ticker and the date range
ticker = 'AAPL'
start_date = '2020-01-01'
end_date = '2025-08-01' # Use a recent date

# Download the data
print(f"Downloading data for {ticker}...")
# The FutureWarning is normal and can be ignored
data = yf.download(ticker, start=start_date, end=end_date)
print("Download complete.")


# --- Step 2: Data Cleaning and Visualization ---
# Clean up the column names (e.g., from ('Close', 'AAPL') to 'Close')
data.columns = data.columns.droplevel(1)

# --- Plotting with Matplotlib (Static Chart) ---
print("\nGenerating static plot with Matplotlib...")
plt.figure(figsize=(15, 7))
plt.plot(data.index, data['Close'], label='AAPL Close Price')
plt.title('Apple (AAPL) Closing Price History')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()

# --- Plotting with Plotly (Interactive Chart) ---
print("Generating interactive candlestick chart with Plotly...")
fig = go.Figure(data=[go.Candlestick(x=data.index,
                open=data['Open'],
                high=data['High'],
                low=data['Low'],
                close=data['Close'])])

fig.update_layout(
    title='Apple (AAPL) Interactive Candlestick Chart',
    yaxis_title='Stock Price (USD)',
    xaxis_title='Date',
    xaxis_rangeslider_visible=True # Adds a slider at the bottom
)
fig.show()

from statsmodels.tsa.arima.model import ARIMA
from sklearn.model_selection import train_test_split

# --- Step 4: Modeling with ARIMA ---

# We will use the 'Close' price for our time series analysis
close_prices = data['Close']

# Split data into training and testing sets (80% train, 20% test)
train_size = int(len(close_prices) * 0.8)
train_data, test_data = close_prices[0:train_size], close_prices[train_size:]

print(f"\nTraining set size: {len(train_data)}")
print(f"Test set size: {len(test_data)}")

# Build the ARIMA model
# The order (p,d,q) is a key parameter. (5,1,0) is a common starting point.
# p: The number of lag observations (autoregressive part).
# d: The number of times the raw observations are differenced (integrated part).
# q: The size of the moving average window (moving average part).
model = ARIMA(train_data, order=(5, 1, 0))
model_fit = model.fit()
print("\nARIMA Model Summary:")
print(model_fit.summary())

# Make predictions
start_index = len(train_data)
end_index = len(train_data) + len(test_data) - 1
predictions = model_fit.predict(start=start_index, end=end_index, typ='levels')

# Visualize the predictions
plt.figure(figsize=(15, 7))
plt.plot(train_data.index, train_data, label='Training Data')
plt.plot(test_data.index, test_data, label='Actual Price (Test Data)', color='green')
plt.plot(predictions.index, predictions, label='ARIMA Predictions', color='red')
plt.title('AAPL Stock Price Prediction using ARIMA')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()

import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

# --- Step 4 (Part 2): Modeling with LSTM ---

# 1. Data Preparation
print("\nPreparing data for LSTM model...")
scaler = MinMaxScaler(feature_range=(0, 1))
# Use the entire 'Close' price dataset for scaling
scaled_data = scaler.fit_transform(data['Close'].values.reshape(-1, 1))

# We'll use the same train/test split size as before
train_data_scaled = scaled_data[0:train_size, :]

# Create the training data sequences
X_train = []
y_train = []
time_step = 60 # Use 60 previous days to predict the next one

for i in range(time_step, len(train_data_scaled)):
    X_train.append(train_data_scaled[i-time_step:i, 0])
    y_train.append(train_data_scaled[i, 0])

# Convert to numpy arrays
X_train, y_train = np.array(X_train), np.array(y_train)

# Reshape the data to be 3D for the LSTM model [samples, time steps, features]
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

# 2. Build and Train the LSTM Model
print("Building and training LSTM model...")
lstm_model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(X_train.shape[1], 1)),
    LSTM(50, return_sequences=False),
    Dense(25),
    Dense(1)
])

lstm_model.compile(optimizer='adam', loss='mean_squared_error')
lstm_model.fit(X_train, y_train, batch_size=1, epochs=1) # Using 1 epoch for speed, can be increased

# 3. Make Predictions
print("Making predictions with LSTM model...")
# We need the last 'time_step' days from the training data to predict the first test value
inputs = data['Close'][len(data) - len(test_data) - time_step:].values
inputs = inputs.reshape(-1, 1)
inputs = scaler.transform(inputs)

X_test = []
for i in range(time_step, len(inputs)):
    X_test.append(inputs[i-time_step:i, 0])

X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

lstm_predictions_scaled = lstm_model.predict(X_test)
# Inverse transform the predictions to get the actual price values
lstm_predictions = scaler.inverse_transform(lstm_predictions_scaled)

# Create a pandas Series for plotting
lstm_predictions = pd.Series(lstm_predictions.flatten(), index=test_data.index)


# 4. Visualize the Predictions
plt.figure(figsize=(15, 7))
plt.plot(train_data.index, train_data, label='Training Data')
plt.plot(test_data.index, test_data, label='Actual Price (Test Data)', color='green')
plt.plot(lstm_predictions.index, lstm_predictions, label='LSTM Predictions', color='orange')
plt.title('AAPL Stock Price Prediction using LSTM')
plt.xlabel('Date')
plt.ylabel('Price (USD)')
plt.legend()
plt.grid(True)
plt.show()

from sklearn.metrics import mean_squared_error, mean_absolute_error
import numpy as np

# --- Step 5: Model Evaluation ---
# Note: Ensure 'predictions' (from ARIMA) and 'lstm_predictions' are available from previous steps

# ARIMA Model Evaluation
arima_rmse = np.sqrt(mean_squared_error(test_data, predictions))
arima_mae = mean_absolute_error(test_data, predictions)
print(f"\n--- ARIMA Model Evaluation ---")
print(f"RMSE: ${arima_rmse:.2f}")
print(f"MAE:  ${arima_mae:.2f}")


# LSTM Model Evaluation
lstm_rmse = np.sqrt(mean_squared_error(test_data, lstm_predictions))
lstm_mae = mean_absolute_error(test_data, lstm_predictions)
print(f"\n--- LSTM Model Evaluation ---")
print(f"RMSE: ${lstm_rmse:.2f}")
print(f"MAE:  ${lstm_mae:.2f}")