In [None]:
def create_submission(test_df, sample_submission, submission_dir, submission_file_name):
    df_subm = pd.read_csv(sample_submission)
    df_subm.set_index("case_id", inplace=True)
    
    df_subm["score"] = test_df.set_index("case_id")["pred_prob"]
    submission_full_path = os.path.join(submission_dir, submission_file_name)
    df_subm.to_csv(submission_full_path)


test_df = Predicting(df_test, features)
create_submission(test_df, SAMPLE_SUB, CFG.SUB_DATA_PATH, "submission.csv")

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
model_path = CFG.MODEL_DATA_PATH / f'lightgbm_fold4_seed{CFG.seed}_ver{CFG.VER}.pkl'
model = pickle.load(open(model_path, 'rb'))

# 特徴量重要度の取得
feature_importances = model.feature_importance(importance_type='gain')
feature_names = model.feature_name()

# 特徴量とその重要度をデータフレームに格納
importance_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': feature_importances
})

# 重要度が高い順に並べ替え
importance_df = importance_df.sort_values(by='Importance', ascending=False)

# プロットの作成
plt.figure(figsize=(10, 15))
sns.barplot(x='Importance', y='Feature', data=importance_df)
plt.title('Feature Importances')
plt.xlabel('Importance')
plt.ylabel('Feature')
plt.show()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
from sklearn.metrics import roc_auc_score

def permutation_importance(df, model, features, model_type):
    original_pred = predict(model, df[features], model_type)
    baseline_score = roc_auc_score(df[CFG.target_col], original_pred)
    importances = {}

    for col in features:
        save_col = df[col].copy()
        df[col] = np.random.permutation(df[col])
        pred = predict(model, df[features], model_type)
        score = roc_auc_score(df[CFG.target_col], pred)
        importances[col] = baseline_score - score
        df[col] = save_col
    
    return importances

def predict(model, data, model_type):
    if model_type == 'lightgbm':
        return model.predict(data)
    elif model_type == 'xgboost':
        import xgboost as xgb
        return model.predict(xgb.DMatrix(data))
    elif model_type == 'catboost':
        return model.predict_proba(data)[:, 1]
    else:
        raise ValueError("Unsupported model type")

def check_importance(method, df, features):
    importances = []
    for fold in range(CFG.n_folds):
        model_path = CFG.MODEL_DATA_PATH / f'{method}_fold{fold + 1}_seed{CFG.seed}_ver{CFG.VER}.pkl'
        model = pickle.load(open(model_path, 'rb'))
        perm = permutation_importance(df, model, features, method)
        importances.append(perm)
    
    average_importance = pd.DataFrame(importances).mean().to_dict()
    return average_importance

def run(df, features):
    results = pd.DataFrame()
    for method in CFG.METHOD_LIST:
        results[f'{method}_perm'] = check_importance(method, df, features)
    return pd.DataFrame(results)

def plot_feature_importances(df):
    plt.figure(figsize=(10, len(df.columns) * 0.5))
    sns.set_theme(style="whitegrid")

    for i, method in enumerate(df.columns):
        plt.subplot(1, len(df.columns), i + 1)
        sns.barplot(x=df[method], y=df.index, color="blue")
        plt.title(f'Feature Importances for {method}')
        plt.xlabel('Importance')
        plt.ylabel('Features')

    plt.tight_layout()
    plt.show()


importance_perm = run(df_train, features)
plot_feature_importances(importance_perm)