In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestClassifier


df = pd.read_csv("My_Data.csv")


df["AI_Impact_num"] = (
    df["AI Impact"]
    .astype(str)
    .str.replace("%", "", regex=False)
    .astype(float)
)


df["AI_Workload_Ratio"] = df["AI_Workload_Ratio"].replace([np.inf, -np.inf], np.nan)
df["AI_Workload_Ratio"] = df["AI_Workload_Ratio"].fillna(df["AI_Workload_Ratio"].median())


bins = [0, 50, 70, 100]
labels = ["Low", "Medium", "High"]

df["Risk_Category"] = pd.cut(
    df["AI_Impact_num"], bins=bins, labels=labels, include_lowest=True
)


X = df[["Tasks", "AI models", "AI_Workload_Ratio", "Job titiles", "Domain"]]
y = df["Risk_Category"]

num_cols = ["Tasks", "AI models", "AI_Workload_Ratio"]
cat_cols = ["Job titiles", "Domain"]


preprocessor = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown='ignore'), cat_cols)
    ]
)


model = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("classifier", RandomForestClassifier(
        n_estimators=300,
        random_state=42
    ))
])


X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


model.fit(X_train, y_train)


y_pred = model.predict(X_test)
df["Model_Prediction"] = model.predict(X)


out = df[[
    "Job titiles",
    "Domain",
    "AI Impact",
    "AI_Impact_num",
    "Risk_Category",       # gerçek risk
    "Model_Prediction"     # model tahmini
]]


out.to_csv("model_risk_predictions.csv", index=False)




model_risk_predictions.csv oluşturuldu!
