In [1]:
# 导入必要的库
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


In [2]:
# 加载数据
data = pd.read_csv('new_output.csv')

# 数据预处理
# 删除不用于预测的列
data = data.drop(columns=['jobSalaryMax', 'jobSalaryMin', 'term', 'salary_max_range'])

# 编码分类特征
label_encoders = {}
categorical_features = ['jobType', 'workAreaCode', 'degree', 'industryType1', 'companyType', 'companySizeCode']
for feature in categorical_features:
    label_encoders[feature] = LabelEncoder()
    data[feature] = label_encoders[feature].fit_transform(data[feature])

# 准备目标变量
y_max = data['jobSalaryMaxLevel']
y_min = data['jobSalaryMinLevel']

# 准备特征变量
X = data.drop(columns=['jobSalaryMaxLevel', 'jobSalaryMinLevel'])

# 标准化特征
scaler = StandardScaler()
X = scaler.fit_transform(X)

In [3]:
# 训练集和测试集划分
X_train, X_test, y_train_max, y_test_max, y_train_min, y_test_min = train_test_split(
    X, y_max, y_min, test_size=0.2, random_state=42)

# 构建随机森林模型
rf_max = RandomForestClassifier(n_estimators=100, random_state=42)
rf_min = RandomForestClassifier(n_estimators=100, random_state=42)

# 训练模型
rf_max.fit(X_train, y_train_max)
rf_min.fit(X_train, y_train_min)

In [4]:
# 预测
y_pred_max = rf_max.predict(X_test)
y_pred_min = rf_min.predict(X_test)

# 输出分类报告和准确率
print("Classification Report for jobSalaryMaxLevel:\n", classification_report(y_test_max, y_pred_max))
print("Accuracy for jobSalaryMaxLevel:", accuracy_score(y_test_max, y_pred_max))
print("Classification Report for jobSalaryMinLevel:\n", classification_report(y_test_min, y_pred_min))
print("Accuracy for jobSalaryMinLevel:", accuracy_score(y_test_min, y_pred_min))

Classification Report for jobSalaryMaxLevel:
               precision    recall  f1-score   support

           1       0.78      0.79      0.78       384
           2       0.61      0.66      0.63       305
           3       0.87      0.82      0.84       469

    accuracy                           0.77      1158
   macro avg       0.75      0.75      0.75      1158
weighted avg       0.77      0.77      0.77      1158

Accuracy for jobSalaryMaxLevel: 0.7659758203799655
Classification Report for jobSalaryMinLevel:
               precision    recall  f1-score   support

           1       0.84      0.86      0.85       543
           2       0.63      0.67      0.65       299
           3       0.85      0.76      0.80       316

    accuracy                           0.79      1158
   macro avg       0.77      0.77      0.77      1158
weighted avg       0.79      0.79      0.79      1158

Accuracy for jobSalaryMinLevel: 0.7858376511226253
