In [1]:
import pandas as pd
from pydantic import BaseModel, Field


In [2]:
class EmployeeData(BaseModel):
    satisfaction_level: float = Field()
    last_evaluation: float = Field()
    number_project: int = Field()
    average_montly_hours: int = Field()
    time_spend_company: int = Field()
    Work_accident: int = Field()
    promotion_last_5years: int = Field()
    Department: object = Field()
    salary: object = Field()

In [3]:
payload = EmployeeData(
    satisfaction_level = 1,
    last_evaluation = 0.43,
    number_project = 4,
    average_montly_hours = 456,
    time_spend_company = 5,
    Work_accident = 0,
    promotion_last_5years = 0,
    Department = "sales",
    salary = "medium"
)

In [4]:
pay_df = pd.DataFrame(payload.model_dump(), index=[0])
pay_df

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_montly_hours,time_spend_company,Work_accident,promotion_last_5years,Department,salary
0,1.0,0.43,4,456,5,0,0,sales,medium


In [5]:
def preprocessing(df):
    # drop missing values
    # df = df.dropna(axis=1)

    # drop duplicates
    # df = df.drop_duplicates()

    # rename columns
    df = df.rename({'Work_accident': 'work_accident',
                  'average_montly_hours':'average_monthly_hours',
                  'promotion_last_5years': 'promotion_last_five_years', 
                  'Department':'department'}, axis=1)

    # dummy encode categorical columns
    df = pd.get_dummies(
        df,
        drop_first=False,
        dtype=int
        )

    # model input
    expected_cols = [
        "satisfaction_level", "last_evaluation", "number_project", "average_monthly_hours", 
        "time_spend_company", "work_accident", "promotion_last_five_years","department_RandD", 
        "department_accounting", "department_hr", "department_management", "department_marketing", 
        "department_product_mng", "department_sales", "department_support", "department_technical",	
        "salary_low", "salary_medium"
    ]

    for col in df.columns:
        if col not in expected_cols:
            df = df.drop(col, axis=1)
    df = df.reindex(columns=expected_cols, fill_value=0)

    return df


pay_df = preprocessing(pay_df)
pay_df

Unnamed: 0,satisfaction_level,last_evaluation,number_project,average_monthly_hours,time_spend_company,work_accident,promotion_last_five_years,department_RandD,department_accounting,department_hr,department_management,department_marketing,department_product_mng,department_sales,department_support,department_technical,salary_low,salary_medium
0,1.0,0.43,4,456,5,0,0,0,0,0,0,0,0,1,0,0,0,1


In [None]:
import httpx

url = "http://127.0.0.1:8000/turnover/predict"
payload = {
  "satisfaction_level": 0.98,
  "last_evaluation": 0.44,
  "number_project": 4,
  "average_montly_hours": 154,
  "time_spend_company": 6,
  "Work_accident": 1,
  "promotion_last_5years": 0,
  "Department": "sales",
  "salary": "medium"
}
with httpx.Client(timeout=10) as client:
    response = client.post(url, json = payload)
    response.raise_for_status()

print(response.status_code)
print(response.headers)
print(response.text)
