In [31]:
# Model/train_model.py

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from xgboost import XGBRegressor
import joblib

# 1. Load dataset
df = pd.read_csv('../Data/ds_salaries.csv')
df.drop_duplicates(inplace=True)

# 2. Label encoding for specific columns
experience_map = {'EN': 0, 'MI': 1, 'SE': 2, 'EX': 3}
employment_map = {'PT': 0, 'FT': 1, 'CT': 2, 'FL': 3}
size_map       = {'S': 0, 'M': 1, 'L': 2}

df['experience_level'] = df['experience_level'].map(experience_map)
df['employment_type']  = df['employment_type'].map(employment_map)
df['company_size']     = df['company_size'].map(size_map)

# 3. Remove unnecessary columns
df.drop(['salary_currency', 'salary', 'work_year'], axis=1, inplace=True, errors='ignore')
df.rename(columns={'salary_in_usd': 'salary'}, inplace=True)

# 4. Extra feature
df['same_country'] = (df['company_location'] == df['employee_residence']).astype(int)

# 5. One-hot encode remaining categoricals
df = pd.get_dummies(df, columns=['job_title', 'company_location', 'employee_residence'], drop_first=True)

# 6. Store final columns for app compatibility
feature_columns = df.drop('salary', axis=1).columns.tolist()

# 7. Define X and y
X = df[feature_columns]
y = df['salary']

# 8. Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 9. Train the model
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.08,
    max_depth=6,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(X_train, y_train)

# 10. Evaluation
y_pred = model.predict(X_test)
print("MAE:", mean_absolute_error(y_test, y_pred))
print("R² Score:", r2_score(y_test, y_pred))

# 11. Save the model and features
joblib.dump(model, '../Model/model.pkl')
joblib.dump(feature_columns, '../Model/feature_columns.pkl')
print("✅ Model and feature columns saved to ../Model/")


MAE: 39707.20703125
R² Score: 0.35452592372894287
✅ Model and feature columns saved to ../Model/
