In [None]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics.pairwise import cosine_similarity


df = pd.read_csv("My_Data.csv")

df["AI_Impact_num"] = (
    df["AI Impact"].astype(str).str.replace("%", "", regex=False).astype(float)
)

df["AI_Workload_Ratio"] = df["AI_Workload_Ratio"].replace([np.inf, -np.inf], np.nan)
df["AI_Workload_Ratio"] = df["AI_Workload_Ratio"].fillna(df["AI_Workload_Ratio"].median())


model = SentenceTransformer("all-MiniLM-L6-v2")  

job_titles = df["Job titiles"].tolist()
job_embeddings = model.encode(job_titles, convert_to_numpy=True)


encoder = OneHotEncoder(sparse_output=False)
domain_encoded = encoder.fit_transform(df[["Domain"]])


numeric = df[["Tasks", "AI models", "AI_Workload_Ratio"]]
scaler = StandardScaler()
numeric_scaled = scaler.fit_transform(numeric)


job_sim = cosine_similarity(job_embeddings)
domain_sim = cosine_similarity(domain_encoded)
feat_sim = cosine_similarity(numeric_scaled)


hybrid_sim = (
    0.45 * job_sim +
    0.35 * domain_sim +
    0.20 * feat_sim
)


bins = [0, 50, 70, 100]
labels = ["Low", "Medium", "High"]
df["Risk_Category"] = pd.cut(df["AI_Impact_num"], bins=bins, labels=labels)


recommendations = []

for i in range(len(df)):
    if df.iloc[i]["Risk_Category"] == "High":

        sims = list(enumerate(hybrid_sim[i]))
        sims = sorted(sims, key=lambda x: x[1], reverse=True)

        top = [
            (idx, score) for idx, score in sims
            if df.iloc[idx]["Risk_Category"] != "High" and idx != i
        ][:3]

        for idx, score in top:
            recommendations.append({
                "High_Risk_Job": df.iloc[i]["Job titiles"],
                "High_Risk_Domain": df.iloc[i]["Domain"],
                "Alternative_Job": df.iloc[idx]["Job titiles"],
                "Alternative_Domain": df.iloc[idx]["Domain"],
                "Similarity": round(score, 4)
            })


rec_df = pd.DataFrame(recommendations)
rec_df.to_csv("hybrid_recommendations_offline.csv", index=False)





hybrid_recommendations_offline.csv olu≈üturuldu!
