In [None]:
# Install Required Libraries
!pip install yfinance pandas numpy matplotlib seaborn scikit-learn tensorflow ta plotly mplfinance pandas_datareader statsmodels



In [None]:
# Import Libraries and Set Display Options
import numpy as np
import pandas as pd
import yfinance as yf
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import mplfinance as mpf
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
import datetime
import ta
from scipy import stats
import warnings
from statsmodels.tsa.seasonal import seasonal_decompose

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.float_format', lambda x: '%.3f' % x)
warnings.filterwarnings('ignore')

In [None]:
plt.style.use('seaborn-v0_8-deep')

In [None]:
# Data Collection Function
def fetch_stock_data(ticker, start_date, end_date):
    try:
        df = yf.download(ticker, start=start_date, end=end_date)
        if isinstance(df.columns, pd.MultiIndex):
            df.columns = [col[0] for col in df.columns]
        print(f"Successfully downloaded {ticker} data:")
        print(f"Date range: {df.index.min()} to {df.index.max()}")
        print(f"Total trading days: {len(df)}")
        return df
    except Exception as e:
        print(f"Error downloading data: {e}")
        return None

In [None]:
# Data Collection and Initial Processing
ticker = "AAPL"
start_date = "2019-01-01"
end_date = "2024-11-08"
df = fetch_stock_data(ticker, start_date, end_date)

# Display data info
print("\nDataset Information:")
print(df.info())
print("\nFirst 5 rows:")
print(df.head())
print("\nBasic Statistics:")
print(df.describe())

[*********************100%***********************]  1 of 1 completed

Successfully downloaded AAPL data:
Date range: 2019-01-02 00:00:00+00:00 to 2024-11-07 00:00:00+00:00
Total trading days: 1474

Dataset Information:
<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 1474 entries, 2019-01-02 00:00:00+00:00 to 2024-11-07 00:00:00+00:00
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Adj Close  1474 non-null   float64
 1   Close      1474 non-null   float64
 2   High       1474 non-null   float64
 3   Low        1474 non-null   float64
 4   Open       1474 non-null   float64
 5   Volume     1474 non-null   int64  
dtypes: float64(5), int64(1)
memory usage: 80.6 KB
None

First 5 rows:
                           Adj Close  Close   High    Low   Open     Volume
Date                                                                       
2019-01-02 00:00:00+00:00     37.750 39.480 39.713 38.557 38.722  148158800
2019-01-03 00:00:00+00:00     33.990 35.548 36.430 35.500 35.995  365248800
20




In [None]:
# Basic Price Visualization (Interactive)
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'],
                        mode='lines',
                        name='Close Price',
                        line=dict(color='blue')))

fig.add_trace(go.Scatter(x=df.index, y=df['High'],
                        fill=None,
                        mode='lines',
                        name='High Price',
                        line=dict(color='lightgray')))

fig.add_trace(go.Scatter(x=df.index, y=df['Low'],
                        fill='tonexty',
                        mode='lines',
                        name='Low Price',
                        line=dict(color='lightgray')))

fig.update_layout(
    title=f'{ticker} Stock Price History',
    yaxis_title='Price ($)',
    xaxis_title='Date',
    hovermode='x unified',
    template='plotly_white'
)

fig.show()

In [None]:
# Candlestick Chart
fig = go.Figure(data=[go.Candlestick(x=df.index,
                open=df['Open'],
                high=df['High'],
                low=df['Low'],
                close=df['Close'])])

fig.update_layout(
    title=f'{ticker} Candlestick Chart',
    yaxis_title='Price ($)',
    xaxis_title='Date',
    template='plotly_white'
)

fig.show()

In [None]:
# Volume Analysis
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=('Price', 'Volume'))

fig.add_trace(go.Scatter(x=df.index, y=df['Close'],
                        mode='lines',
                        name='Close Price',
                        line=dict(color='blue')),
                        row=1, col=1)

fig.add_trace(go.Bar(x=df.index, y=df['Volume'],
                     name='Volume',
                     marker_color='rgba(0,0,255,0.3)'),
                     row=2, col=1)

fig.update_layout(
    title=f'{ticker} Price and Volume Analysis',
    height=800,
    showlegend=True,
    template='plotly_white'
)

fig.show()

In [None]:
# Calculate Technical Indicators
def add_technical_indicators(df):
    # Trend Indicators
    df['SMA_20'] = ta.trend.sma_indicator(df['Close'], window=20)
    df['SMA_50'] = ta.trend.sma_indicator(df['Close'], window=50)
    df['SMA_200'] = ta.trend.sma_indicator(df['Close'], window=200)
    df['EMA_20'] = ta.trend.ema_indicator(df['Close'], window=20)

    # Momentum Indicators
    df['RSI'] = ta.momentum.rsi(df['Close'], window=14)
    df['MACD'] = ta.trend.macd_diff(df['Close'])
    df['MACD_Signal'] = ta.trend.macd_signal(df['Close'])

    # Volatility Indicators
    bollinger = ta.volatility.BollingerBands(df['Close'])
    df['Bollinger_High'] = bollinger.bollinger_hband()
    df['Bollinger_Low'] = bollinger.bollinger_lband()
    df['ATR'] = ta.volatility.average_true_range(df['High'], df['Low'], df['Close'])

    # Volume Indicators
    df['OBV'] = ta.volume.on_balance_volume(df['Close'], df['Volume'])

    return df

df = add_technical_indicators(df)

In [None]:
# Moving Averages Visualization
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'],
                        mode='lines',
                        name='Close Price',
                        line=dict(color='blue')))

fig.add_trace(go.Scatter(x=df.index, y=df['SMA_20'],
                        mode='lines',
                        name='20-day SMA',
                        line=dict(color='orange')))

fig.add_trace(go.Scatter(x=df.index, y=df['SMA_50'],
                        mode='lines',
                        name='50-day SMA',
                        line=dict(color='red')))

fig.add_trace(go.Scatter(x=df.index, y=df['SMA_200'],
                        mode='lines',
                        name='200-day SMA',
                        line=dict(color='green')))

fig.update_layout(
    title=f'{ticker} Moving Averages',
    yaxis_title='Price ($)',
    xaxis_title='Date',
    template='plotly_white'
)

fig.show()

In [None]:
# Bollinger Bands Visualization
fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'],
                        mode='lines',
                        name='Close Price',
                        line=dict(color='blue')))

fig.add_trace(go.Scatter(x=df.index, y=df['Bollinger_High'],
                        mode='lines',
                        name='Bollinger High',
                        line=dict(color='gray', dash='dash')))

fig.add_trace(go.Scatter(x=df.index, y=df['Bollinger_Low'],
                        mode='lines',
                        name='Bollinger Low',
                        fill='tonexty',
                        line=dict(color='gray', dash='dash')))

fig.update_layout(
    title=f'{ticker} Bollinger Bands',
    yaxis_title='Price ($)',
    xaxis_title='Date',
    template='plotly_white'
)

fig.show()

In [None]:
# RSI and MACD Analysis
fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=('RSI', 'MACD'))

fig.add_trace(go.Scatter(x=df.index, y=df['RSI'],
                        mode='lines',
                        name='RSI',
                        line=dict(color='purple')),
                        row=1, col=1)

fig.add_hline(y=70, line_dash="dash", line_color="red", row=1, col=1)
fig.add_hline(y=30, line_dash="dash", line_color="green", row=1, col=1)

fig.add_trace(go.Scatter(x=df.index, y=df['MACD'],
                        mode='lines',
                        name='MACD',
                        line=dict(color='blue')),
                        row=2, col=1)

fig.add_trace(go.Scatter(x=df.index, y=df['MACD_Signal'],
                        mode='lines',
                        name='Signal Line',
                        line=dict(color='orange')),
                        row=2, col=1)

fig.update_layout(
    height=800,
    title=f'{ticker} RSI and MACD Analysis',
    template='plotly_white'
)

fig.show()

In [None]:
# Returns Analysis
df['Daily_Return'] = df['Close'].pct_change()
df['Cumulative_Return'] = (1 + df['Daily_Return']).cumprod()

fig = make_subplots(rows=2, cols=1, shared_xaxes=True,
                    vertical_spacing=0.03,
                    subplot_titles=('Daily Returns', 'Cumulative Returns'))

fig.add_trace(go.Scatter(x=df.index, y=df['Daily_Return'],
                        mode='lines',
                        name='Daily Returns',
                        line=dict(color='blue')),
                        row=1, col=1)

fig.add_trace(go.Scatter(x=df.index, y=df['Cumulative_Return'],
                        mode='lines',
                        name='Cumulative Returns',
                        line=dict(color='green')),
                        row=2, col=1)

fig.update_layout(
    height=800,
    title=f'{ticker} Returns Analysis',
    template='plotly_white'
)

fig.show()

In [None]:
# Statistical Analysis
print("Statistical Analysis of Daily Returns:")
print("\nDescriptive Statistics:")
print(df['Daily_Return'].describe())

print("\nNormality Test (Shapiro-Wilk):")
statistic, p_value = stats.shapiro(df['Daily_Return'].dropna())
print(f"Statistic: {statistic:.4f}")
print(f"P-value: {p_value:.4f}")


Statistical Analysis of Daily Returns:

Descriptive Statistics:
count   1473.000
mean       0.001
std        0.020
min       -0.129
25%       -0.008
50%        0.001
75%        0.012
max        0.120
Name: Daily_Return, dtype: float64

Normality Test (Shapiro-Wilk):
Statistic: 0.9410
P-value: 0.0000


In [None]:
# Returns Distribution
fig = go.Figure()

fig.add_trace(go.Histogram(x=df['Daily_Return'].dropna(),
                          nbinsx=50,
                          name='Daily Returns',
                          showlegend=True))

fig.add_trace(go.Histogram(x=np.random.normal(df['Daily_Return'].mean(),
                                             df['Daily_Return'].std(),
                                             len(df['Daily_Return'])),
                          nbinsx=50,
                          name='Normal Distribution',
                          opacity=0.7,
                          showlegend=True))

fig.update_layout(
    title='Distribution of Daily Returns vs Normal Distribution',
    xaxis_title='Return',
    yaxis_title='Frequency',
    barmode='overlay',
    template='plotly_white'
)

fig.show()

In [None]:
# Volatility Analysis
df['Volatility'] = df['Daily_Return'].rolling(window=21).std() * np.sqrt(252)

fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Volatility'],
                        mode='lines',
                        name='21-day Rolling Volatility',
                        line=dict(color='red')))

fig.update_layout(
    title=f'{ticker} Annualized Volatility (21-day Rolling Window)',
    yaxis_title='Volatility',
    xaxis_title='Date',
    template='plotly_white'
)

fig.show()

In [None]:
# Prepare Data for LSTM Model
def prepare_data(df, look_back=60):
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(df['Close'].values.reshape(-1,1))

    x, y = [], []
    for i in range(look_back, len(scaled_data)):
        x.append(scaled_data[i-look_back:i, 0])
        y.append(scaled_data[i, 0])

    x, y = np.array(x), np.array(y)
    x = np.reshape(x, (x.shape[0], x.shape[1], 1))

    return x, y, scaler

look_back = 60
x, y, scaler = prepare_data(df, look_back)

# Split data
train_size = int(len(x) * 0.8)
x_train, x_test = x[:train_size], x[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

In [None]:
# Build and Train LSTM Model
model = Sequential([
    LSTM(50, return_sequences=True, input_shape=(look_back, 1)),
    Dropout(0.2),
    LSTM(50, return_sequences=True),
    Dropout(0.2),
    LSTM(50),
    Dropout(0.2),
    Dense(1)
])

model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')
history = model.fit(x_train, y_train, epochs=50, batch_size=32,
                   validation_split=0.1, verbose=1)

Epoch 1/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 198ms/step - loss: 0.0488 - val_loss: 0.0191
Epoch 2/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 245ms/step - loss: 0.0045 - val_loss: 0.0038
Epoch 3/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 247ms/step - loss: 0.0035 - val_loss: 0.0025
Epoch 4/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 133ms/step - loss: 0.0028 - val_loss: 8.7984e-04
Epoch 5/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 232ms/step - loss: 0.0031 - val_loss: 0.0028
Epoch 6/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 172ms/step - loss: 0.0027 - val_loss: 0.0052
Epoch 7/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 115ms/step - loss: 0.0032 - val_loss: 0.0016
Epoch 8/50
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 184ms/step - loss: 0.0025 - val_loss: 0.0015
Epoch 9/50
[1m32/32[0m [32m━━━

In [None]:
# Model Performance Visualization
fig = go.Figure()

fig.add_trace(go.Scatter(x=list(range(len(history.history['loss']))),
                        y=history.history['loss'],
                        mode='lines',
                        name='Training Loss',
                        line=dict(color='blue')))

fig.add_trace(go.Scatter(x=list(range(len(history.history['val_loss']))),
                        y=history.history['val_loss'],
                        mode='lines',
                        name='Validation Loss',
                        line=dict(color='red')))

fig.update_layout(
    title='Model Training History',
    yaxis_title='Loss',
    xaxis_title='Epoch',
    template='plotly_white'
)

fig.show()

In [None]:
# Make Predictions and Evaluate
train_predict = model.predict(x_train)
test_predict = model.predict(x_test)

# Inverse transform predictions
train_predict = scaler.inverse_transform(train_predict)
y_train_inv = scaler.inverse_transform(y_train.reshape(-1, 1))
test_predict = scaler.inverse_transform(test_predict)
y_test_inv = scaler.inverse_transform(y_test.reshape(-1, 1))

# Calculate errors
train_rmse = np.sqrt(mean_squared_error(y_train_inv, train_predict))
test_rmse = np.sqrt(mean_squared_error(y_test_inv, test_predict))
train_mae = mean_absolute_error(y_train_inv, train_predict)
test_mae = mean_absolute_error(y_test_inv, test_predict)

print("Model Evaluation Metrics:")
print(f'Train RMSE: ${train_rmse:.2f}')
print(f'Test RMSE: ${test_rmse:.2f}')
print(f'Train MAE: ${train_mae:.2f}')
print(f'Test MAE: ${test_mae:.2f}')

[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 50ms/step
[1m9/9[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Model Evaluation Metrics:
Train RMSE: $5.18
Test RMSE: $5.91
Train MAE: $3.82
Test MAE: $4.65


In [None]:
# Visualize Predictions
train_dates = df.index[look_back:train_size+look_back]
test_dates = df.index[train_size+look_back:]

fig = go.Figure()

fig.add_trace(go.Scatter(x=df.index, y=df['Close'],
                         mode='lines',
                         name='Actual Close',
                         line=dict(color='blue')))

fig.add_trace(go.Scatter(x=train_dates, y=train_predict.flatten(),
                         mode='lines',
                         name='Train Predictions',
                         line=dict(color='green')))

fig.add_trace(go.Scatter(x=test_dates, y=test_predict.flatten(),
                         mode='lines',
                         name='Test Predictions',
                         line=dict(color='red')))

fig.update_layout(
    title=f'{ticker} Stock Price Prediction',
    xaxis_title='Date',
    yaxis_title='Price ($)',
    template='plotly_white'
)

fig.show()

In [None]:
# Future Predictions
last_60_days = df['Close'].values[-60:]
last_60_days_scaled = scaler.transform(last_60_days.reshape(-1, 1))
X_future = []
X_future.append(last_60_days_scaled)
X_future = np.array(X_future)
X_future = np.reshape(X_future, (X_future.shape[0], X_future.shape[1], 1))

future_pred = model.predict(X_future)
future_pred = scaler.inverse_transform(future_pred)

print(f"Predicted price for next day: ${future_pred[0][0]:.2f}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 29ms/step
Predicted price for next day: $218.51


In [None]:
# Prediction Confidence Interval
def prediction_interval(y_true, y_pred, conf=0.95):
    n = len(y_true)
    mse = np.sum((y_true - y_pred)**2) / n
    stderr = np.sqrt(mse / n)
    return stats.t.interval(conf, n-1, loc=y_pred, scale=stderr)

lower, upper = prediction_interval(y_test_inv, test_predict)

fig = go.Figure()

fig.add_trace(go.Scatter(x=test_dates, y=y_test_inv.flatten(),
                         mode='lines',
                         name='Actual',
                         line=dict(color='blue')))

fig.add_trace(go.Scatter(x=test_dates, y=test_predict.flatten(),
                         mode='lines',
                         name='Predicted',
                         line=dict(color='red')))

fig.add_trace(go.Scatter(x=test_dates, y=upper.flatten(),
                         fill=None,
                         mode='lines',
                         name='Upper CI',
                         line=dict(color='gray')))

fig.add_trace(go.Scatter(x=test_dates, y=lower.flatten(),
                         fill='tonexty',
                         mode='lines',
                         name='Lower CI',
                         line=dict(color='gray')))

fig.update_layout(
    title=f'{ticker} Prediction with Confidence Interval',
    xaxis_title='Date',
    yaxis_title='Price ($)',
    template='plotly_white'
)

fig.show()

In [None]:
# Time Series Decomposition
decomposition = seasonal_decompose(df['Close'], model='additive', period=252)

fig = make_subplots(rows=4, cols=1, shared_xaxes=True,
                    subplot_titles=('Observed', 'Trend', 'Seasonal', 'Residual'))

fig.add_trace(go.Scatter(x=decomposition.observed.index, y=decomposition.observed,
                         mode='lines', name='Observed'), row=1, col=1)

fig.add_trace(go.Scatter(x=decomposition.trend.index, y=decomposition.trend,
                         mode='lines', name='Trend'), row=2, col=1)

fig.add_trace(go.Scatter(x=decomposition.seasonal.index, y=decomposition.seasonal,
                         mode='lines', name='Seasonal'), row=3, col=1)

fig.add_trace(go.Scatter(x=decomposition.resid.index, y=decomposition.resid,
                         mode='lines', name='Residual'), row=4, col=1)

fig.update_layout(height=900, title_text=f"{ticker} Time Series Decomposition",
                  template='plotly_white')

fig.show()

In [None]:
# Risk-Return Analysis
annual_return = df['Daily_Return'].mean() * 252
annual_volatility = df['Daily_Return'].std() * np.sqrt(252)
sharpe_ratio = annual_return / annual_volatility

print(f"Annual Return: {annual_return:.2%}")
print(f"Annual Volatility: {annual_volatility:.2%}")
print(f"Sharpe Ratio: {sharpe_ratio:.2f}")

Annual Return: 34.82%
Annual Volatility: 31.13%
Sharpe Ratio: 1.12


In [None]:
# Monte Carlo Simulation
def monte_carlo_simulation(start_price, days, iterations, volatility, return_rate):
    dt = 1 / days
    prices = np.zeros((days, iterations))
    prices[0] = start_price

    for i in range(1, days):
        prices[i] = prices[i-1] * np.exp((return_rate - 0.5 * volatility**2) * dt +
                                         volatility * np.random.normal(0, np.sqrt(dt), iterations))

    return prices

days = 252  # One trading year
iterations = 1000
last_price = df['Close'].iloc[-1]

simulated_prices = monte_carlo_simulation(last_price, days, iterations, annual_volatility, annual_return)

fig = go.Figure()

for i in range(iterations):
    fig.add_trace(go.Scatter(x=list(range(days)), y=simulated_prices[:, i],
                             mode='lines', opacity=0.1, line=dict(color='blue'),
                             name=f'Simulation {i+1}'))

fig.add_trace(go.Scatter(x=list(range(days)), y=np.percentile(simulated_prices, 95, axis=1),
                         mode='lines', line=dict(color='red', width=2, dash='dash'),
                         name='95th Percentile'))

fig.add_trace(go.Scatter(x=list(range(days)), y=np.percentile(simulated_prices, 5, axis=1),
                         mode='lines', line=dict(color='green', width=2, dash='dash'),
                         name='5th Percentile'))

fig.add_trace(go.Scatter(x=list(range(days)), y=np.median(simulated_prices, axis=1),
                         mode='lines', line=dict(color='black', width=2),
                         name='Median'))

fig.update_layout(
    title=f'{ticker} Monte Carlo Simulation (1 Year Forecast)',
    xaxis_title='Trading Days',
    yaxis_title='Simulated Price ($)',
    showlegend=True,
    template='plotly_white'
)

fig.show()