In [5]:
df = pd.read_csv("Occupations_with_summaries_and_exposure.csv")
embeddings = np.load("SBERT_embeddings_summaries.npy")


In [7]:
app_code = r'''
import streamlit as st
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

# ---------- LOAD DATA (cached) ----------

@st.cache_data
def load_df():
    return pd.read_csv("Occupations_with_summaries_and_exposure.csv")

@st.cache_resource
def load_embeddings():
    return np.load("SBERT_embeddings_summaries.npy")

df = load_df()
embeddings = load_embeddings()

# Use only occupations that have exposure
df_valid = df[~df["Exposure_Score"].isna()].copy()


# ---------- RECOMMENDER FUNCTION ----------

def recommend_transitions(
    job_code: str,
    top_k: int = 10,
    min_similarity: float = 0.4,
    require_lower_exposure: bool = True
) -> pd.DataFrame:

    matches = df.index[df["O*NET-SOC Code"] == job_code].tolist()
    if not matches:
        raise ValueError(f"Job code {job_code} not found.")
    idx = matches[0]

    base_row = df.loc[idx]
    base_title = base_row["Element Name"]
    base_exp = base_row["Exposure_Score"]

    if pd.isna(base_exp):
        raise ValueError(f"Exposure score missing for {job_code} ({base_title}).")

    query_vec = embeddings[idx].reshape(1, -1)
    sims = cosine_similarity(query_vec, embeddings)[0]

    result = df.copy()
    result["similarity"] = sims
    result = result[result.index != idx]
    result = result[~result["Exposure_Score"].isna()]

    if require_lower_exposure:
        result = result[result["Exposure_Score"] < base_exp]

    result = result[result["similarity"] >= min_similarity]
    result["exposure_diff"] = base_exp - result["Exposure_Score"]

    result = result.sort_values(
        by=["similarity", "exposure_diff"],
        ascending=[False, False]
    ).head(top_k)

    return result[[
        "O*NET-SOC Code",
        "Element Name",
        "similarity",
        "Exposure_Score",
        "exposure_diff",
        "Summary"
    ]]


# ---------- STREAMLIT UI ----------

st.set_page_config(page_title="AI Exposure Job Recommender", layout="wide")

st.title("AI Exposure Job Transition Recommender")

st.markdown(
    """
Select your current occupation to see:
1. How exposed it is to generative AI  
2. Skill-similar occupations with **lower** exposure  
"""
)

# ----- SIDEBAR -----
st.sidebar.header("Choose your occupation")

options = df_valid["O*NET-SOC Code"] + " – " + df_valid["Element Name"]
selected = st.sidebar.selectbox("Occupation", options)

top_k = st.sidebar.slider("Number of recommendations", 5, 20, 10)
min_sim = st.sidebar.slider("Minimum similarity", 0.0, 1.0, 0.4, 0.05)

selected_code = selected.split(" – ")[0]

base_row = df_valid.loc[df_valid["O*NET-SOC Code"] == selected_code].iloc[0]
base_title = base_row["Element Name"]
base_exp = base_row["Exposure_Score"]


# ----- MAIN CONTENT -----

st.subheader("Your current occupation")

col1, col2 = st.columns(2)

with col1:
    st.markdown(f"**Occupation:** {selected_code} – {base_title}")
    st.markdown(f"**AI exposure score:** `{base_exp:.3f}`")

    if base_exp < 0.2:
        risk_label = "Low"
    elif base_exp < 0.35:
        risk_label = "Medium"
    else:
        risk_label = "High"

    st.markdown(f"**Exposure level:** {risk_label}")

with col2:
    st.markdown("**Summary (LLM description):**")
    st.write(base_row.get("Summary", "No summary available."))

st.markdown("---")

st.subheader("Recommended lower-exposure occupations")

try:
    recs = recommend_transitions(
        selected_code,
        top_k=top_k,
        min_similarity=min_sim,
        require_lower_exposure=True
    )

    if recs.empty:
        st.info("No lower-exposure jobs found. Try lowering the similarity threshold.")
    else:
        st.dataframe(
            recs[[
                "O*NET-SOC Code",
                "Element Name",
                "similarity",
                "Exposure_Score",
                "exposure_diff"
            ]],
            use_container_width=True
        )

        st.markdown("### Similarity scores")
        st.bar_chart(recs.set_index("Element Name")["similarity"])

        st.markdown("### Exposure levels")
        st.bar_chart(recs.set_index("Element Name")["Exposure_Score"])

except ValueError as e:
    st.error(str(e))
'''

# Write the file
with open("app.py", "w", encoding="utf-8") as f:
    f.write(app_code)

print("app.py created successfully!")


app.py created successfully!
