In [None]:
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import joblib
import json
import yfinance as yf
import numpy as np

# App configuration

st.set_page_config(
    page_title="ESG & Market Volatility Dashboard",
    page_icon="📊",
    layout="wide"
)

st.title("📊 ESG & Market Volatility Dashboard")
st.markdown("""
Professional analytics platform for **investors** and **corporate leaders**.  
It connects **sustainability performance (ESG)** with **market risk (volatility)**,  
delivering actionable insights for investment decisions.
""")

# Load Model & Data

@st.cache_resource
def load_model():
    return joblib.load("lgbm_mix_model.pkl")

@st.cache_data
def load_metrics():
    with open("lgbm_mix_model_metrics.json", "r") as f:
        return json.load(f)

@st.cache_data
def load_data():
    return pd.read_csv("src/dataset_final.csv")

model = load_model()
metrics = load_metrics()
data = load_data()

# Prediction functions

def predict_with_dataset(ticker: str):
    ticker_data = data[data["Ticker"] == ticker]
    X = ticker_data.drop(columns=["Volatility", "Date", "Ticker"])
    y_pred = model.predict(X)
    return ticker_data, y_pred

def predict_with_yfinance(ticker: str):
    yf_data = yf.download(ticker, period="1y")
    if yf_data.empty:
        return None, None

    yf_data["Return"] = yf_data["Adj Close"].pct_change()
    yf_data["Volatility"] = yf_data["Return"].rolling(window=30).std() * np.sqrt(252)

    X_new = yf_data[["Open", "High", "Low", "Close", "Volume"]].fillna(0).tail(1)
    y_pred = model.predict(X_new)

    return yf_data, y_pred

# Tabs layout

tab1, tab2, tab3, tab4, tab5 = st.tabs([
    "📈 Overview",
    "🌍 ESG vs Volatility",
    "🔮 Prediction",
    "💼 Portfolio simulation",
    "⚙️ Model performance"
])

# Tab 1 - Overview

with tab1:
    st.header("Market & ESG overview")

    # ESG Distribution
    fig1 = px.histogram(data, x="ESG Score", nbins=30, title="Distribution of ESG Scores")
    st.plotly_chart(fig1, use_container_width=True)

    # Volatility Distribution
    fig2 = px.histogram(data, x="Volatility", nbins=30, title="Distribution of Volatility")
    st.plotly_chart(fig2, use_container_width=True)

    # Correlation heatmap (numeric vars only)
    corr = data.select_dtypes(include=[np.number]).corr()
    fig3 = px.imshow(corr, text_auto=True, title="Correlation Matrix (Financials & ESG)")
    st.plotly_chart(fig3, use_container_width=True)

# Tab 2 - ESG vs Volatility

with tab2:
    st.header("ESG vs Market Volatility")

    fig4 = px.scatter(
        data, x="ESG Score", y="Volatility",
        size="MarketCap" if "MarketCap" in data.columns else None,
        color="Governance Score" if "Governance Score" in data.columns else "ESG Score",
        hover_data=["Ticker"],
        title="ESG Score vs Volatility"
    )
    st.plotly_chart(fig4, use_container_width=True)

    st.markdown("""
    **Insight:** This chart explores whether higher ESG scores are associated  
    with lower stock volatility — a signal of potentially more resilient firms.
    """)

# Tab 3 - Prediction

with tab3:
    st.header("Predict Volatility by Ticker")
    ticker_input = st.text_input("Enter ticker symbol:", "AAPL").upper()

    if ticker_input in data["Ticker"].unique():
        st.success(f"Data for {ticker_input} retrieved from ESG dataset.")
        df, preds = predict_with_dataset(ticker_input)

        fig5 = px.line(df, x="Date", y="Volatility", title=f"Historical Volatility - {ticker_input}")
        st.plotly_chart(fig5, use_container_width=True)

        st.metric("Predicted Volatility (latest)", f"{preds[-1]:.4f}")

    else:
        st.warning(f"{ticker_input} not in ESG dataset. Using Yahoo Finance data.")
        df, preds = predict_with_yfinance(ticker_input)

        if df is not None:
            fig6 = px.line(df, x=df.index, y="Volatility", title=f"Estimated Volatility - {ticker_input}")
            st.plotly_chart(fig6, use_container_width=True)

            st.metric("Predicted Volatility", f"{preds[0]:.4f}")
            st.caption("Note: ESG scores unavailable. Prediction based on financial features only.")
        else:
            st.error("Unable to retrieve data for this ticker.")

# Tab 4 - Portfolio Simulation

with tab4:
    st.header("Portfolio Simulation")

    selected_tickers = st.multiselect(
        "Select up to 5 tickers for portfolio analysis:",
        options=data["Ticker"].unique(),
        default=data["Ticker"].unique()[:3]
    )

    if selected_tickers:
        port_data = data[data["Ticker"].isin(selected_tickers)]
        fig7 = px.line(port_data, x="Date", y="Volatility", color="Ticker",
                       title="Volatility Evolution of Selected Portfolio")
        st.plotly_chart(fig7, use_container_width=True)

        avg_vol = port_data.groupby("Ticker")["Volatility"].mean()
        fig8 = px.bar(avg_vol, title="Average Volatility per Ticker")
        st.plotly_chart(fig8, use_container_width=True)

# Tab 5 - Model Performance
with tab5:
    st.header("Model Performance")

    col1, col2, col3 = st.columns(3)
    col1.metric("R² Score", f"{metrics.get('r2', 0):.3f}")
    col2.metric("MSE", f"{metrics.get('mse', 0):.3f}")
    col3.metric("MAE", f"{metrics.get('mae', 0):.3f}")

    st.markdown("""
    **Interpretation:**  
    - R² closer to 1 indicates stronger explanatory power.  
    - Lower MSE and MAE indicate higher predictive accuracy.  
    """)

    st.caption("These metrics are based on the trained LightGBM model using ESG + financial data.")