# Analysis Prototype (Phase 1)

This notebook validates the core analysis flow before Streamlit implementation.

In [1]:
import numpy as np
import pandas as pd
import plotly.graph_objects as go
import statsmodels.api as sm

In [None]:
CSV_PATH = 'sample_data.csv'
Min_Ele_Flow = 8800.0
Max_Ele_Flow = 13200.0

In [3]:
def validate_dataframe(df: pd.DataFrame) -> None:
    required_columns = ['F.S.Flux', 'Ele.Flow']
    missing_columns = [c for c in required_columns if c not in df.columns]
    if missing_columns:
        raise ValueError(f'Missing required columns: {missing_columns}')

    if len(df) < 3:
        raise ValueError('At least 3 rows are required for analysis.')

    if df[required_columns].isnull().any().any():
        raise ValueError('Missing values found in required columns.')

    for col in required_columns:
        df[col] = pd.to_numeric(df[col], errors='raise')

In [4]:
df = pd.read_csv(CSV_PATH, encoding='utf-8')
validate_dataframe(df)
df.head()

Unnamed: 0,F.S.Flux,Ele.Flow
0,1.1,9900
1,1.2,10600
2,1.11,9850
3,1.3,13000
4,1.23,11000


In [5]:
x = df['F.S.Flux'].astype(float)
y = df['Ele.Flow'].astype(float)
x_with_const = sm.add_constant(x)
model = sm.OLS(y, x_with_const).fit()
pred_summary = model.get_prediction(x_with_const).summary_frame(alpha=0.05)

slope = float(model.params['F.S.Flux'])
intercept = float(model.params['const'])
r_squared = float(model.rsquared)

lower_fit = np.polyfit(x.to_numpy(), pred_summary['obs_ci_lower'].to_numpy(), deg=1)
upper_fit = np.polyfit(x.to_numpy(), pred_summary['obs_ci_upper'].to_numpy(), deg=1)

a_lower, b_lower = float(lower_fit[0]), float(lower_fit[1])
a_upper, b_upper = float(upper_fit[0]), float(upper_fit[1])

if abs(a_lower) < 1e-12:
    raise ZeroDivisionError('Lower CI fitted slope is too close to zero.')
if abs(a_upper) < 1e-12:
    raise ZeroDivisionError('Upper CI fitted slope is too close to zero.')

min_intersection = (Min_Ele_Flow - b_lower) / a_lower
max_intersection = (Max_Ele_Flow - b_upper) / a_upper

results = {
    'slope': slope,
    'intercept': intercept,
    'r_squared': r_squared,
    'min_intersection': float(min_intersection),
    'max_intersection': float(max_intersection),
}
results

{'slope': 14345.827439886829,
 'intercept': -6172.842998585565,
 'r_squared': 0.8807660702727143,
 'min_intersection': 1.262967411751163,
 'max_intersection': 1.201924724136081}

In [6]:
fig = go.Figure()

x_candidates = np.array([x.min(), x.max(), min_intersection, max_intersection], dtype=float)
y_candidates = np.array([y.min(), y.max(), Min_Ele_Flow, Max_Ele_Flow], dtype=float)

x_min = float(np.min(x_candidates))
x_max = float(np.max(x_candidates))
y_min = float(np.min(y_candidates))
y_max = float(np.max(y_candidates))

x_pad = (x_max - x_min) * 0.1
y_pad = (y_max - y_min) * 0.1
if x_pad == 0.0:
    x_pad = max(abs(x_min) * 0.1, 1.0)
if y_pad == 0.0:
    y_pad = max(abs(y_min) * 0.1, 1.0)

x_plot_min = x_min - x_pad
x_plot_max = x_max + x_pad
y_plot_min = y_min - y_pad
y_plot_max = y_max + y_pad

x_line = np.linspace(x_plot_min, x_plot_max, 200)
reg_line = slope * x_line + intercept
ci_lower_line = lower_fit[0] * x_line + lower_fit[1]
ci_upper_line = upper_fit[0] * x_line + upper_fit[1]

fig.add_trace(go.Scatter(
    x=x, y=y, mode='markers', name='Observed',
    marker=dict(size=8)
))

fig.add_trace(go.Scatter(
    x=x_line, y=reg_line, mode='lines', name='Regression line'
))

fig.add_trace(go.Scatter(
    x=x_line, y=ci_upper_line, mode='lines',
    line=dict(width=0), showlegend=False, hoverinfo='skip'
))
fig.add_trace(go.Scatter(
    x=x_line, y=ci_lower_line, mode='lines',
    line=dict(width=0), fill='tonexty',
    fillcolor='rgba(31, 119, 180, 0.2)', name='95% prediction interval'
))

fig.add_vline(x=min_intersection, line_dash='dash', line_color='red')
fig.add_vline(x=max_intersection, line_dash='dash', line_color='red')

fig.add_trace(go.Scatter(
    x=[x_plot_min, x_plot_max],
    y=[Min_Ele_Flow, Min_Ele_Flow],
    mode='lines',
    name='Ele.Flow???',
    line=dict(color='blue', width=2)
))
fig.add_trace(go.Scatter(
    x=[x_plot_min, x_plot_max],
    y=[Max_Ele_Flow, Max_Ele_Flow],
    mode='lines',
    name='Ele.Flow???',
    showlegend=False,
    line=dict(color='blue', width=2)
))

fig.add_trace(go.Scatter(
    x=[min_intersection, max_intersection],
    y=[Min_Ele_Flow, Max_Ele_Flow],
    mode='markers',
    showlegend=False,
    marker=dict(size=14, color='red', line=dict(color='black', width=1))
))

fig.update_layout(
    title='F.S.Flux vs Ele.Flow',
    xaxis_title='F.S.Flux',
    yaxis_title='Ele.Flow',
    template='plotly_white'
)

fig.update_xaxes(range=[x_plot_min, x_plot_max])
fig.update_yaxes(
    range=[y_plot_min, y_plot_max],
    tickformat=',.0f',
    separatethousands=True,
    exponentformat='none'
)

fig.show()


In [7]:
print(f"Regression equation: y = {slope:.4f}x + {intercept:.4f}")
print(f"R^2: {r_squared:.6f}")
print(f"min_intersection: {min_intersection:.6f}")
print(f"max_intersection: {max_intersection:.6f}")

Regression equation: y = 14345.8274x + -6172.8430
R^2: 0.880766
min_intersection: 1.262967
max_intersection: 1.201925
