In [15]:
import streamlit as st
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import zipfile
import io

# Title
st.title("üìä Scorecard Vintage Analysis (M1, M2, ... buckets)")

# Sidebar: Upload & Settings
st.sidebar.header("‚öôÔ∏è Settings")
uploaded_file = st.sidebar.file_uploader("Upload Excel or CSV file", type=["xlsx", "csv"])

threshold = st.sidebar.number_input(
    "Threshold for 'Bad' (‚â• this value ‚Üí 1)", 
    min_value=0, max_value=1000, 
    value=60, step=1
)

if uploaded_file is None:
    st.info("üëà Please upload a file in the sidebar.")
    st.stop()

# Load data
try:
    if uploaded_file.name.endswith('.xlsx'):
        df = pd.read_excel(uploaded_file)
    elif uploaded_file.name.endswith('.csv'):
        df = pd.read_csv(uploaded_file)
    else:
        st.error("Unsupported file type.")
        st.stop()
except Exception as e:
    st.error(f"Error reading file: {e}")
    st.stop()

st.write("### üìÅ Raw Data Preview")
st.dataframe(df.head(10))

# Validate at least 3 columns
if df.shape[1] < 3:
    st.error("Input file must have at least 3 columns.")
    st.stop()

# Rename columns: first 3 kept, rest ‚Üí M1, M2, ...
cols = df.columns.tolist()
num_rest = len(cols) - 3
new_col_names = cols[:3] + [f"M{i+1}" for i in range(num_rest)]
df_aligned = df.copy()
df_aligned.columns = new_col_names

st.write("### üî§ Renamed Columns (First 3 preserved)")
st.write(f"‚Üí {df_aligned.columns.tolist()}")

# Binarize: ‚â• threshold ‚áí 1; < threshold ‚áí 0; NaN stays NaN
cols_to_update = df_aligned.columns[3:]

# Use vectorized operation (faster & cleaner than apply+np.where)
df_binarized = df_aligned.copy()
df_binarized[cols_to_update] = df_aligned[cols_to_update].where(
    df_aligned[cols_to_update].isna(),
    (df_aligned[cols_to_update] >= threshold).astype(int)
)

# Propagation function (row-wise)
def propagate_ones(row):
    row = row.copy()
    activated = False
    for i in range(len(row)):
        if not pd.isna(row.iloc[i]):
            if row.iloc[i] == 1:
                activated = True
            if activated:
                row.iloc[i] = 1
    return row

# Apply propagation
df_propagated = df_binarized.copy()
df_propagated[cols_to_update] = df_binarized[cols_to_update].apply(propagate_ones, axis=1)

st.write("### üîÅ After Propagation (1s spread rightwards)")
st.dataframe(df_propagated.head(10))

# Compute Sum & Count (count excludes NaN)
sum_vals = df_propagated[cols_to_update].sum()
count_vals = df_propagated[cols_to_update].count()  # count() skips NaN

summary_df = pd.DataFrame({
    'Sum': sum_vals,
    'Count': count_vals
})

# Show summary table
st.write("### üìä Column-wise Summary")
st.dataframe(summary_df)

# Plot
summary_plot = summary_df.sort_values(by='Count', ascending=False)

fig, ax1 = plt.subplots(figsize=(25, 10))

# Bars: Count
bars = ax1.bar(summary_plot.index, summary_plot['Count'], color='skyblue', label='Observations (Count)')
ax1.set_ylabel('Observations (Count)', color='steelblue')
ax1.tick_params(axis='y', labelcolor='steelblue')
ax1.set_xlabel('Vintage Bucket (M1, M2, ...)')

# Line: Sum (Bad)
ax2 = ax1.twinx()
line = ax2.plot(summary_plot.index, summary_plot['Sum'], 
                color='crimson', marker='o', linestyle='-', linewidth=2, label='Bad (Sum)')
ax2.set_ylabel('Bad (Sum)', color='crimson')
ax2.tick_params(axis='y', labelcolor='crimson')

# Title & legend
plt.title(f'Vintage Buckets: Observations vs Bad (Threshold = {threshold})', fontsize=14)
fig.tight_layout()

# Combined legend
handles1, labels1 = ax1.get_legend_handles_labels()
handles2, labels2 = ax2.get_legend_handles_labels()
ax1.legend(handles1 + handles2, labels1 + labels2, loc='upper right')

# Rotate x-ticks
plt.xticks(rotation=45, ha='right')

st.pyplot(fig)



# Prepare all data in memory
csv_processed = df_propagated.to_csv(index=False).encode('utf-8')
csv_summary = summary_df.to_csv().encode('utf-8')

# Save plot to bytes
img_buf = io.BytesIO()
fig.savefig(img_buf, format='png', dpi=300, bbox_inches='tight')
img_buf.seek(0)

# Create ZIP
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, "w", zipfile.ZIP_DEFLATED) as zf:
    zf.writestr("processed_vintage_data.csv", csv_processed)
    zf.writestr("vintage_summary.csv", csv_summary)
    zf.writestr("vintage_analysis_plot.png", img_buf.getvalue())

zip_buffer.seek(0)

st.download_button(
    label="üì¶ Download All (ZIP)",
    data=zip_buffer,
    file_name="vintage_analysis_output.zip",
    mime="application/zip"
)



AttributeError: 'NoneType' object has no attribute 'head'