In [1]:
!pip install yfinance pandas numpy matplotlib seaborn scikit-learn plotly prophet




In [2]:
import yfinance as yf

# Example: Download data for 3 companies (Apple, Google, Microsoft)
tickers = ['AAPL', 'GOOGL', 'MSFT']
data = {}

for ticker in tickers:
    stock = yf.download(ticker, start="2020-01-01", end="2024-01-01")
    data[ticker] = stock


YF.download() has changed argument auto_adjust default to True


[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed
[*********************100%***********************]  1 of 1 completed


In [3]:
import pandas as pd

for ticker in tickers:
    df = data[ticker]
    df = df[['Close']].fillna(method='ffill')  # forward fill missing values
    data[ticker] = df


  df = df[['Close']].fillna(method='ffill')  # forward fill missing values
  df = df[['Close']].fillna(method='ffill')  # forward fill missing values
  df = df[['Close']].fillna(method='ffill')  # forward fill missing values


In [4]:
def add_indicators(df):
    df['SMA_20'] = df['Close'].rolling(window=20).mean()
    df['EMA_20'] = df['Close'].ewm(span=20, adjust=False).mean()

    # Bollinger Bands
    df['STD'] = df['Close'].rolling(window=20).std()
    df['Upper_Band'] = df['SMA_20'] + (2 * df['STD'])
    df['Lower_Band'] = df['SMA_20'] - (2 * df['STD'])

    # RSI
    delta = df['Close'].diff()
    gain = (delta.where(delta > 0, 0)).rolling(window=14).mean()
    loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean()
    rs = gain / loss
    df['RSI'] = 100 - (100 / (1 + rs))

    return df

for ticker in tickers:
    data[ticker] = add_indicators(data[ticker])


In [5]:
from sklearn.ensemble import IsolationForest

def detect_anomalies(df):
    model = IsolationForest(contamination=0.01)
    df['anomaly'] = model.fit_predict(df[['Close']])
    df['anomaly'] = df['anomaly'].map({1: 0, -1: 1})  # 1 = anomaly
    return df

for ticker in tickers:
    data[ticker] = detect_anomalies(data[ticker])


In [17]:
from prophet import Prophet

def forecast_prophet(df):
    df = df.copy()

    # Ensure datetime index
    df.index = pd.to_datetime(df.index)

    # Reset index for Prophet
    df_reset = df.reset_index()

    # Rename columns for Prophet
    # Automatically detect the index name (usually "Date" or "index")
    if 'index' in df_reset.columns:
        df_reset.rename(columns={'index': 'ds', 'Close': 'y'}, inplace=True)
    elif 'Date' in df_reset.columns:
        df_reset.rename(columns={'Date': 'ds', 'Close': 'y'}, inplace=True)
    else:
        raise ValueError("Expected a datetime index or a 'Date' column.")

    # Keep only the required columns
    prophet_df = df_reset[['ds', 'y']]

    # Drop any missing values that might cause issues
    prophet_df.dropna(inplace=True)

    # Prophet modeling
    model = Prophet()
    model.fit(prophet_df)

    # Future prediction for 30 days
    future = model.make_future_dataframe(periods=30)
    forecast = model.predict(future)

    return model, forecast


In [20]:
print(data['AAPL'].reset_index().head())
print(data['AAPL'].columns)
print(data['AAPL'].index)



Price        Date      Close SMA_20     EMA_20 STD Upper_Band Lower_Band RSI  \
Ticker                  AAPL                                                   
0      2020-01-02  72.716064    NaN  72.716064 NaN        NaN        NaN NaN   
1      2020-01-03  72.009132    NaN  72.648738 NaN        NaN        NaN NaN   
2      2020-01-06  72.582909    NaN  72.642468 NaN        NaN        NaN NaN   
3      2020-01-07  72.241547    NaN  72.604285 NaN        NaN        NaN NaN   
4      2020-01-08  73.403641    NaN  72.680414 NaN        NaN        NaN NaN   

Price  anomaly  
Ticker          
0            0  
1            0  
2            0  
3            0  
4            0  
MultiIndex([(     'Close', 'AAPL'),
            (    'SMA_20',     ''),
            (    'EMA_20',     ''),
            (       'STD',     ''),
            ('Upper_Band',     ''),
            ('Lower_Band',     ''),
            (       'RSI',     ''),
            (   'anomaly',     '')],
           names=['Price', 'Tic

In [21]:
import plotly.graph_objects as go

def plot_anomalies(df, title):
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df.index, y=df['Close'], name='Close Price'))
    fig.add_trace(go.Scatter(x=df[df['anomaly'] == 1].index,
                             y=df[df['anomaly'] == 1]['Close'],
                             mode='markers',
                             name='Anomaly',
                             marker=dict(color='red', size=6)))
    fig.update_layout(title=title, xaxis_title='Date', yaxis_title='Price')
    fig.show()

plot_anomalies(data['AAPL'], "Apple Stock Price with Anomalies")