In [None]:
import pandas as pd
import numpy as np
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.preprocessing import StandardScaler
import plotly.graph_objects as go
import streamlit as st

#  preprocess the data
def preprocess_crypto_data(file_path, crypto_symbol):
    crypto_data = pd.read_csv(file_path)
    crypto_data['Date'] = pd.to_datetime(crypto_data['Date'])
    crypto_data.set_index('Date', inplace=True)
    
    # Filter for the specific cryptocurrency
    crypto_series = crypto_data[crypto_data['Symbol'] == crypto_symbol]
    
    # Extract relevant features
    features = ['Adj', 'Open', 'High', 'Low', 'Volume']
    crypto_series = crypto_series[features]
    
   
    crypto_series = crypto_series.asfreq('D').ffill()
    
    # Feature engineering: Add moving averages
    crypto_series['Moving_Avg_7'] = crypto_series['Adj'].rolling(window=7).mean()
    crypto_series['Moving_Avg_14'] = crypto_series['Adj'].rolling(window=14).mean()
    crypto_series['Moving_Avg_30'] = crypto_series['Adj'].rolling(window=30).mean()
    crypto_series['Volatility_10'] = crypto_series['Adj'].rolling(window=10).std()
    crypto_series['Lag_1'] = crypto_series['Adj'].shift(1)
    crypto_series['Lag_2'] = crypto_series['Adj'].shift(2)
    
    # Fill NaN values introduced by feature engineering
    crypto_series = crypto_series.bfill()
    
    return crypto_series

# Function to train and forecast using SARIMAX
def train_sarimax(crypto_series, crypto_symbol):
    # Define target variable 
    target_series = crypto_series['Adj']
    exog_features = ['Moving_Avg_7', 'Moving_Avg_14', 'Moving_Avg_30', 'Volatility_10', 'Lag_1']
    exog_data = crypto_series[exog_features].values
    
    # Train-test split
    train_size = int(len(target_series) * 0.8)
    train = target_series[:train_size]
    test = target_series[train_size:]
    exog_train = exog_data[:train_size, :]
    exog_test = exog_data[train_size:, :]
    
    # Normalize the exogenous variables
    scaler = StandardScaler()
    exog_train = scaler.fit_transform(exog_train)
    exog_test = scaler.transform(exog_test)
    
    #  SARIMAX parameters
    order = (1, 1, 1)  # (p, d, q)
    seasonal_order = (1, 1, 0, 30)  
    
    #  SARIMAX model
    sarimax_model = SARIMAX(train, order=order, seasonal_order=seasonal_order, exog=exog_train)
    sarimax_result = sarimax_model.fit(disp=False, maxiter=1000)
    
    # Forecast for the test period
    sarimax_forecast = sarimax_result.forecast(steps=len(test), exog=exog_test)
    
    # Calculate accuracy
    mape = np.mean(np.abs((test - sarimax_forecast) / test)) * 100
    accuracy = 100 - mape
    
    # Generate buy and sell signals based on moving averages
    buy_signals = crypto_series[(crypto_series['Moving_Avg_7'] > crypto_series['Moving_Avg_30']) &
                                (crypto_series['Moving_Avg_14'] > crypto_series['Moving_Avg_30'])]
    sell_signals = crypto_series[(crypto_series['Moving_Avg_7'] < crypto_series['Moving_Avg_30']) &
                                 (crypto_series['Moving_Avg_14'] < crypto_series['Moving_Avg_30'])]

    # Plot 
    fig = go.Figure()

    
    fig.add_trace(go.Candlestick(
        x=crypto_series.index,
        open=crypto_series['Open'],
        high=crypto_series['High'],
        low=crypto_series['Low'],
        close=crypto_series['Adj'],
        name="OHLC"
    ))

    # forecasted data
    fig.add_trace(go.Scatter(
        x=crypto_series.index[-len(test):],
        y=sarimax_forecast,
        mode='lines',
        name='Forecasted Data',
        line=dict(color='orange', dash='dot')
    ))

    #  moving averages
    fig.add_trace(go.Scatter(
        x=crypto_series.index,
        y=crypto_series['Moving_Avg_7'],
        mode='lines',
        name='7-Day MA',
        line=dict(color='blue')
    ))
    fig.add_trace(go.Scatter(
        x=crypto_series.index,
        y=crypto_series['Moving_Avg_14'],
        mode='lines',
        name='14-Day MA',
        line=dict(color='purple')
    ))
    fig.add_trace(go.Scatter(
        x=crypto_series.index,
        y=crypto_series['Moving_Avg_30'],
        mode='lines',
        name='30-Day MA',
        line=dict(color='green')
    ))

    # buy signals
    fig.add_trace(go.Scatter(
        x=buy_signals.index,
        y=buy_signals['Adj'],
        mode='markers',
        name='Buy Signals',
        marker=dict(color='green', size=10, symbol='triangle-up')
    ))

    # sell signals
    fig.add_trace(go.Scatter(
        x=sell_signals.index,
        y=sell_signals['Adj'],
        mode='markers',
        name='Sell Signals',
        marker=dict(color='red', size=10, symbol='triangle-down')
    ))

    
    fig.update_layout(
        title="SARIMAX Forecast with Buy/Sell Signals and Moving Averages",
        xaxis_title="Date",
        yaxis_title="Price (USD)",
        xaxis_rangeslider_visible=True,
        template="plotly_white",
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1)
    )

    return accuracy, fig,sarimax_forecast