In [4]:
# Cell index 0: load CSV and produce interactive Plotly plots with moving average and Kalman filter
import os
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

# Optional: adjust renderer if running outside a Jupyter environment
# pio.renderers.default = 'notebook_connected'  # try 'iframe_connected' or 'png' if needed

csv_path = "no_load_data_3.csv"

if not os.path.exists(csv_path):
    raise FileNotFoundError(f"{csv_path} not found in the current working directory: {os.getcwd()}")

df = pd.read_csv(csv_path)

# moving average window (change as needed)
window = 5
if window < 1:
    raise ValueError("window must be >= 1")

# simple 1D Kalman filter implementation for a time series
def kalman_filter(series, process_var=1e-5, meas_var=1e-2):
    # series: pandas Series or 1D array-like
    z = np.asarray(series)
    n = len(z)
    xhat = np.zeros(n)
    P = np.zeros(n)
    xhat_minus = np.zeros(n)
    P_minus = np.zeros(n)
    K = np.zeros(n)
    xhat[0] = z[0]
    P[0] = 1.0
    for k in range(1, n):
        # time update (predict)
        xhat_minus[k] = xhat[k-1]
        P_minus[k] = P[k-1] + process_var
        # measurement update (correct)
        K[k] = P_minus[k] / (P_minus[k] + meas_var)
        xhat[k] = xhat_minus[k] + K[k] * (z[k] - xhat_minus[k])
        P[k] = (1 - K[k]) * P_minus[k]
    # return as pandas Series with original index if available
    if hasattr(series, 'index'):
        return pd.Series(xhat, index=series.index)
    return pd.Series(xhat)

# select numeric columns for plotting
numeric_cols = df.select_dtypes(include="number").columns.tolist()

if len(numeric_cols) == 0:
    raise ValueError("No numeric columns found to plot.")
elif len(numeric_cols) == 1:
    col = numeric_cols[0]
    # prepare index for plotting
    df_plot = df.reset_index().rename(columns={'index': 'Index'})
    ma = df_plot[col].rolling(window=window, min_periods=1).mean()
    # Kalman filter (tweak process_var and meas_var as needed)
    kf = kalman_filter(df_plot[col], process_var=1e-5, meas_var=1e-2)

    # overlay actual, moving average and Kalman-filtered series on the same axes
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df_plot['Index'], y=df_plot[col], mode='lines+markers', name='Actual'))
    fig.add_trace(go.Scatter(x=df_plot['Index'], y=ma, mode='lines', name=f'MA (w={window})', line=dict(width=3)))
    fig.add_trace(go.Scatter(x=df_plot['Index'], y=kf, mode='lines', name='Kalman', line=dict(dash='dash', width=2)))
    fig.update_xaxes(title_text='Index')
    fig.update_yaxes(title_text=col)
    fig.update_layout(height=450, width=900, title_text=f"{col}: actual, moving average (w={window}), and Kalman")
    fig.show()
else:
    # when two or more numeric columns: plot first two as scatter (actual) and filters of y against x
    x_col, y_col = numeric_cols[0], numeric_cols[1]
    ma = df[y_col].rolling(window=window, min_periods=1).mean()
    kf = kalman_filter(df[y_col], process_var=1e-5, meas_var=1e-2)

    # overlay actual and filtered y on the same axes (x vs y)
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=df[x_col], y=df[y_col], mode='markers', name='Actual', marker=dict(opacity=0.7, size=6)))
    fig.add_trace(go.Scatter(x=df[x_col], y=ma, mode='lines', name=f'MA (w={window})', line=dict(width=2)))
    fig.add_trace(go.Scatter(x=df[x_col], y=kf, mode='lines', name='Kalman', line=dict(dash='dash', width=2)))
    fig.update_xaxes(title_text=x_col)
    fig.update_yaxes(title_text=y_col)
    fig.update_layout(height=500, width=1000, title_text=f"{y_col}: actual, moving average (w={window}), and Kalman")
    fig.show()