In [20]:
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from scipy import stats
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.svm import SVR
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.neural_network import MLPRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.tree import ExtraTreeRegressor
from xgboost import XGBRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import AdaBoostRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.ensemble import BaggingRegressor
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

data = pd.read_csv(r"D:\DL_Homework\Kaggle2_Titanic\统计建模\数据\data-3.csv", encoding='gb18030')
data['date'] = pd.to_datetime(data['date'])
 # 添加年、月、日列
data['year'] = data['date'].dt.year
data['month'] = data['date'].dt.month
data['day'] = data['date'].dt.day
# print(data.head())
cols_to_convert = ['AQI指数', 'PM2.5', 'PM10', 'O3', 'no2', 'so2', 'co', 'T', 'Po', 'U', 'Ff', 'VV', 'RRR', 'year', 'month', 'day']
data[cols_to_convert] = data[cols_to_convert].astype(float)
y = data['AQI指数']
x = data.drop(['AQI指数','date'], axis=1)
 # 进行BOX-COX变换
y = stats.boxcox(y)[0]
 # 数据标准化
scaler = StandardScaler()
x = scaler.fit_transform(x)

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 33)

models = [LinearRegression(), KNeighborsRegressor(), SVR(), Ridge(), Lasso(), MLPRegressor(alpha=20),
          DecisionTreeRegressor(), ExtraTreeRegressor(), XGBRegressor(), RandomForestRegressor(), AdaBoostRegressor(),
          GradientBoostingRegressor(), BaggingRegressor()]
models_str = ['LinearRegression', 'KNNRegressor', 'SVR', 'Ridge', 'Lasso', 'MLPRegressor', 'DecisionTree', 'ExtraTree',
              'XGBoost', 'RandomForest', 'AdaBoost', 'GradientBoost', 'Bagging']

In [21]:
import pandas as pd
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import r2_score
from sklearn.preprocessing import MinMaxScaler
import numpy as np
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true))
 # 创建一个空的DataFrame
df_model_eval = pd.DataFrame(columns=['Model', 'MSE', 'RMSE', 'MAE', 'R2', 'MAPE'])
 # 循环遍历所有模型，并记录评估结果
for name, model in zip(models_str, models):
    model = model
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    MSE = mean_squared_error(y_test,y_pred)
    RMSE = np.sqrt(mean_squared_error(y_test,y_pred))
    MAE = mean_absolute_error(y_test,y_pred)
    R2 = r2_score(y_test,y_pred)
    MAPE = mean_absolute_percentage_error(y_test,y_pred)
    df_model_eval = df_model_eval.append({'Model': name,
                                          'MSE': MSE,
                                          'RMSE': RMSE,
                                          'MAE': MAE,
                                          'R2': R2,
                                          'MAPE': MAPE},
                                         ignore_index=True)
 # 输出表格
print(df_model_eval)



               Model       MSE      RMSE       MAE        R2      MAPE
0   LinearRegression  0.125706  0.354551  0.271412  0.537156  0.055091
1       KNNRegressor  0.127142  0.356570  0.266951  0.531870  0.054286
2                SVR  0.113566  0.336996  0.247206  0.581855  0.050276
3              Ridge  0.125705  0.354549  0.271415  0.537160  0.055092
4              Lasso  0.271835  0.521378  0.426310 -0.000885  0.087252
5       MLPRegressor  0.126547  0.355735  0.272921  0.534058  0.055364
6       DecisionTree  0.215845  0.464591  0.343543  0.205269  0.069930
7          ExtraTree  0.229406  0.478963  0.368738  0.155338  0.074955
8            XGBoost  0.109909  0.331525  0.250050  0.595322  0.050896
9       RandomForest  0.105770  0.325223  0.241481  0.610560  0.048987
10          AdaBoost  0.134114  0.366215  0.286649  0.506200  0.058835
11     GradientBoost  0.108576  0.329509  0.248368  0.600228  0.050592
12           Bagging  0.118854  0.344753  0.258764  0.562384  0.052467


In [22]:
df_model_eval.to_csv('model_eval-3.csv')