In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score, explained_variance_score, max_error
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.svm import SVR
import lightgbm as lgb
import xgboost as xgb


data = pd.read_csv(r" ")


x = data.drop(['label'], axis=1)
y = data['label']



x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2022)


def evaluate_model_performance(y_test, y_predict):
    print('RMSE值为:', np.sqrt(mean_squared_error(y_test, y_predict)))
    print('MAE值为:', mean_absolute_error(y_test, y_predict))
    print('R方值为:', r2_score(y_test, y_predict))
    print('解释方差分数:', explained_variance_score(y_test, y_predict))
    print('最大误差:', max_error(y_test, y_predict))

def try_different_method(model):
    model.fit(x_train, y_train)
    y_predict = model.predict(x_test)
    evaluate_model_performance(y_test, y_predict)


model_KNN = KNeighborsRegressor(n_neighbors=5)
model_RF = RandomForestRegressor(random_state=2022, n_estimators=50, max_depth=2)
model_XGB = xgb.XGBRegressor(max_depth=4, learning_rate=0.3, n_estimators=50, random_state=2022)
model_SVM = SVR(kernel='rbf')
model_LGB = lgb.LGBMRegressor(max_depth=-1, learning_rate=0.1, n_estimators=100, random_state=2022)



print('KNN模型评分如下：')
try_different_method(model_KNN)
print('随机森林模型评分如下：')
try_different_method(model_RF)
print('XGBoost模型评分如下：')
try_different_method(model_XGB)
print('SVM模型评分如下：')
try_different_method(model_SVM)
print('LightGBM模型评分如下：')
try_different_method(model_LGB)



In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsRegressor
from sklearn.ensemble import RandomForestRegressor
import xgboost as xgb



param_grid_KNN = {
    'n_neighbors': [3, 5, 7, 9]
}


param_grid_RF = {
    'n_estimators': [10, 50, 100],
    'max_depth': [None, 2, 4, 6]
}


param_grid_XGB = {
    'max_depth': [3, 4, 5],
    'learning_rate': [0.1, 0.3, 0.5],
    'n_estimators': [50, 100, 150]
}




models = [
    ('KNN', KNeighborsRegressor(), param_grid_KNN),
    ('RF', RandomForestRegressor(random_state=2022), param_grid_RF),
    ('XGB', xgb.XGBRegressor(random_state=2022), param_grid_XGB),
]


for name, model, param_grid in models:
    grid_search = GridSearchCV(model, param_grid, cv=10, scoring='neg_mean_squared_error', n_jobs=-1)
    grid_search.fit(x_train, y_train) 
    print(f'{name}模型的最佳参数: {grid_search.best_params_}')
    print(f'{name}模型的最佳得分: {-grid_search.best_score_}')


In [None]:
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
import lightgbm as lgb



param_grid_SVM = {
    'C': [1, 10],
    'kernel': ['rbf']  
}


param_grid_LGB = {
    'max_depth': [2, 4, 6], 
    'learning_rate': [0.1, 0.2],  
    'n_estimators': [30, 50]  
}


models = [
    ('SVM', SVR(), param_grid_SVM),
    ('LGB', lgb.LGBMRegressor(random_state=2022), param_grid_LGB),
]


for name, model, param_grid in models:
    print(f"开始网格搜索：{name}")
    grid_search = GridSearchCV(model, param_grid, cv=10, scoring='neg_mean_squared_error', n_jobs=1)
    grid_search.fit(x_train, y_train)  
    print(f'{name}模型的最佳参数: {grid_search.best_params_}')
    print(f'{name}模型的最佳得分: {-grid_search.best_score_}')


In [None]:
best_params_KNN = {'n_neighbors': 5}
best_params_RF = {'max_depth': None, 'n_estimators': 100}
best_params_XGB = {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50}
best_params_SVM = {'C': 1, 'kernel': 'rbf'}
best_params_LGB = {'learning_rate': 0.1, 'max_depth': 4, 'n_estimators':50}


model_KNN = KNeighborsRegressor(**best_params_KNN)
model_RF = RandomForestRegressor(random_state=2022, **best_params_RF)
model_XGB = xgb.XGBRegressor(random_state=2022, **best_params_XGB)
model_SVM = SVR(**best_params_SVM)
model_LGB = lgb.LGBMRegressor(random_state=2022, **best_params_LGB)



models = [
    ('KNN', model_KNN),
    ('RF', model_RF),
    ('XGB', model_XGB),
    ('SVM', model_SVM),
    ('LGB', model_LGB),
    
]


for name, model in models:
    model.fit(x_train, y_train)  
    y_predict = model.predict(x_test)  
    print(f'{name}模型评分如下：')
    evaluate_model_performance(y_test, y_predict) 
    print('----------------------------------------')

In [None]:
import matplotlib.pyplot as plt


y_test_reset = y_test.reset_index(drop=True)


models_with_names = [
    ('KNN', model_KNN),
    ('Random Forest', model_RF),
    ('XGBoost', model_XGB),
    ('SVM', model_SVM),
    ('LightGBM', model_LGB),
    ]


for name, model in models_with_names:
    
    y_predict = model.predict(x_test)
    
   
    plt.figure(figsize=(12, 8))
    plt.plot(y_predict, color='blue', marker='o', linestyle='-', linewidth=2, markersize=5, label='Predicted')
    plt.plot(y_test_reset, color='red', marker='o', linestyle='-', linewidth=2, markersize=5, label='Actual')
    plt.title(name, fontsize=20)
    plt.xlabel('Sample', fontsize=20)
    plt.ylabel('Predictive Variable', fontsize=20)
    plt.xticks(fontsize=20)
    plt.yticks(fontsize=20)
    plt.legend(fontsize=20)
    plt.show()


In [None]:
import shap


explainer_RF = shap.TreeExplainer(model_RF)
shap_values_RF = explainer_RF.shap_values(x_train)


explainer_LGB = shap.TreeExplainer(model_LGB)
shap_values_LGB = explainer_LGB.shap_values(x_train)


shap.summary_plot(shap_values_RF, x_train, plot_type="dot", show=False)
plt.show()


shap.summary_plot(shap_values_LGB, x_train, plot_type="dot", show=False)
plt.show()


In [None]:
from SALib.sample import morris as morris_sample
from SALib.analyze import morris as morris_analyze
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Define the problem dictionary for Morris Sensitivity Analysis
problem = {
    'num_vars': x_train.shape[1],
    'names': x_train.columns.tolist(),
    'bounds': [[np.percentile(x_train[col], 0.1), np.percentile(x_train[col], 99.9)] for col in x_train.columns]
}

# Generate samples
param_values = morris_sample.sample(problem, N=100, num_levels=4)

# Assuming param_values is your 2D numpy array from the Morris sample
predictions = []

for params in param_values:
    # Convert the params to a DataFrame with the correct feature names
    params_df = pd.DataFrame(params.reshape(1, -1), columns=problem['names'])
    # Make predictions using the model
    y_pred = model_RF.predict(params_df)
    predictions.append(y_pred)

# Perform the Morris analysis
Si = morris_analyze.analyze(problem, param_values, np.array(predictions), print_to_console=False)

# Get the scores for the most important metrics
scores = Si['mu_star']

# Sort the scores and corresponding names
sorted_indices = np.argsort(-scores)  # note the minus sign for descending order
sorted_scores = scores[sorted_indices]
sorted_names = [problem['names'][i] for i in sorted_indices]

# Plot the top 20 most sensitive features
plt.figure(figsize=(10, 8))
plt.barh(sorted_names[:20], sorted_scores[:20], align='center')
plt.xlabel('Morris Sensitivity Index')
plt.title('Top 20 Most Sensitive Features according to Morris Method')
plt.gca().invert_yaxis()  # highest values at the top
plt.show()


In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor


data = pd.read_csv(r" ")
data2 = pd.read_csv(r" ") 


x = data.drop(['label'], axis=1)
y = data['label']
x_new = data2.drop(['label'], axis=1)  


x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=2022)

def try_different_method(model, x_train, y_train, x_new):
    model.fit(x_train, y_train)
    y_predict = model.predict(x_new)
    return y_predict 


model_RF = RandomForestRegressor(random_state=2022, n_estimators=50, max_depth=2)



y_predict = try_different_method(model_RF, x_train, y_train, x_new)


print(y_predict)


predictions_df = pd.DataFrame(y_predict, columns=['Predicted_Label'])
predictions_df.to_csv(r" ", index=False)
print(" ")
