In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'playground-series-s4e6:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F73290%2F8710574%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240624%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240624T222212Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D2952bc020b1c6213fd0d8074fbd6b2a6b81ff1ccb09d12663056cebffc5610e8b2fedd8058ccb8c828a0f8ef21f7b4d29017f5be63e1cdbc9091cafb27d6754affed294fe2447d3bad46a8af2b2f5363009da8c6688955b1e28308e5fe0a5ec4a7d505f1c3198c95a74fa6d14043e8598e99249bf32821c1e8863e1e66135ededb28fcae66f83d990ca72e0164b4b92532fcb1bdff28f1e4c0ece9b0a88c1c575f7365403fd2bd5aa19787449d0325e7417f10cfc3b421cfad5382fdb6fbca4d1a5a684370a3afc233fcb076d0c2ec508e48cf0eb2d1d5c2855f4306bcbd88b038a76c8340138c6bc0d70292cb6a27047a62c3a4cbfe2159f21cb7f12c8b6247'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
#necessary
import pandas as pd#导入csv文件的库
import numpy as np#进行矩阵运算的库
#model lgb分类模型,日志评估,早停防止过拟合
from  lightgbm import LGBMClassifier,log_evaluation,early_stopping
#metric
from sklearn.metrics import accuracy_score
#KFold是直接分成k折,StratifiedKFold还要考虑每种类别的占比
from sklearn.model_selection import StratifiedKFold
import warnings#避免一些可以忽略的报错
warnings.filterwarnings('ignore')#filterwarnings()方法是用于设置警告过滤器的方法，它可以控制警告信息的输出方式和级别。

import random#提供了一些用于生成随机数的函数
#设置随机种子,保证模型可以复现
def seed_everything(seed):
    np.random.seed(seed)#numpy的随机种子
    random.seed(seed)#python内置的随机种子
seed_everything(seed=2024)

In [None]:
#Target: Graduate、Dropout、Enrolled   毕业、辍学、注册
labels=['Graduate','Enrolled','Dropout']
idx2target={}
target2idx={}
for i in range(len(labels)):
    target2idx[labels[i]]=i
    idx2target[i]=labels[i]
train_df=pd.read_csv("/kaggle/input/playground-series-s4e6/train.csv")
print(f"len(train_df):{len(train_df)}")
train_df['Target']=train_df['Target'].apply(lambda x:target2idx[x])
train_df.head()

In [None]:
test_df=pd.read_csv("/kaggle/input/playground-series-s4e6/test.csv")
print(f"len(test_df):{len(test_df)}")
test_df.head()

In [None]:
category=[]
for col in test_df.drop(['Nacionality'],axis=1).columns:#国籍后面单独做特征
    if train_df[col].nunique()<50:#类别型变量
        #每个类别在训练数据中的数量
        col_value_counts=train_df[col].value_counts().reset_index()
        #少于20个的类别就是稀有品种了,咱们也就不要了
        col_value_counts=col_value_counts[col_value_counts['count']>=20]
        #col列多于20个的类别
        unique_value=col_value_counts[col].values
        category.append([col,unique_value])
print(f"category:{category}")

def feature_engineer(df):
    df.drop(['id'],axis=1,inplace=True)
    #Nacionality:国籍,1占大多数,剩下所有类只有0.01的比例
    df['Nacionality']=(df['Nacionality']==1)
    #Gender:性别0:1占比约为2:1,0明显更容易退学.
    for i in range(len(category)):
        col,unique_value=category[i]
        for value in unique_value:
            df[f"{col}_{value}"]=(df[col]==value)
    cross_cols=['Unemployment rate','Inflation rate', 'GDP']
    for  i in range(len(cross_cols)):
        for j in range(i+1,len(cross_cols)):
            df[f"{cross_cols[i]}+{cross_cols[j]}"]=df[cross_cols[i]]+df[cross_cols[j]]
            df[f"{cross_cols[i]}-{cross_cols[j]}"]=df[cross_cols[i]]-df[cross_cols[j]]
            df[f"{cross_cols[i]}*{cross_cols[j]}"]=df[cross_cols[i]]*df[cross_cols[j]]
            df[f"{cross_cols[i]}/{cross_cols[j]}"]=df[cross_cols[i]]/df[cross_cols[j]]
    return df
train_feats=feature_engineer(train_df)
test_feats=feature_engineer(test_df)

for col in test_feats.columns:
    #数据呈现右偏,并且可以进行log1p处理
    if (train_feats[col].nunique()>10) and (train_feats[col].skew()>1) and train_feats[col].min()>-1:
        print(f"skew:{col}")
        train_feats[col]=np.log1p(train_feats[col])
        test_feats[col]=np.log1p(test_feats[col])

train_feats.head()

In [None]:
#计算两组变量的皮尔逊相关系数
def pearson_corr(x1,x2,eps=1e-15):
    mean_x1=np.mean(x1)
    mean_x2=np.mean(x2)
    std_x1=np.std(x1)
    std_x2=np.std(x2)
    pearson=np.mean((x1-mean_x1)*(x2-mean_x2))/(std_x1*std_x2+eps)
    return pearson

def corr_feats(margin=0.1):
    train_feats[f'corr_{margin}']=0
    test_feats[f'corr_{margin}']=0
    drop_cols=[]
    for col in test_feats.columns:
        if 'corr' in col:
            drop_cols+=[col]
    for col in test_feats.drop(drop_cols,axis=1).columns:
        tmp_df=train_feats[[col,'Target']].dropna().copy()
        pearson=pearson_corr(tmp_df[col].values,tmp_df['Target'].values)
        max_value,min_value=float(tmp_df[col].max()),float(tmp_df[col].min())
        if abs(pearson)>margin:
            train_feats[f'corr_{margin}']+=((train_feats[col]-min_value)/(max_value-min_value)).fillna(0)*pearson
            test_feats[f'corr_{margin}']+=((test_feats[col]-min_value)/(max_value-min_value)).fillna(0)*pearson
    tmp_df=train_feats[[f'corr_{margin}','Target']].dropna().copy()
for margin in [0.1,0.05,0.025,0.01,0.005]:
    corr_feats(margin)
train_feats.head()

In [None]:
#这些是我目前找到的nunique=2的列
cols=[]
for col in test_feats.columns:
    if train_feats[col].nunique()==2:
        cols.append(col)

train_feats['total_count']=0
test_feats['total_count']=0
for col in cols:
    target_groupby_col=train_feats['Target'].groupby(train_feats[col]).mean().reset_index()
    zero_target=target_groupby_col[target_groupby_col[col]==0]['Target'].values
    one_target=target_groupby_col[target_groupby_col[col]==1]['Target'].values
    if abs(zero_target-one_target)>0.025:#差别大,有用的特征
        if zero_target>one_target:
            train_feats['total_count']-=train_feats[col]
        else:#one_target>zero_target
            train_feats['total_count']+=train_feats[col]
train_feats.head()

In [None]:
#rank 和value_counts
total_feats=pd.concat((train_feats,test_feats),axis=0)
for col in test_feats.columns:
    total_feats[f"{col}_rank"]=total_feats[col].rank()/len(total_feats)
    value_status=total_feats[col].value_counts()
    total_feats[f"{col}_count"]=pd.Series(total_feats[col]).map(value_status)
train_feats=total_feats[:len(train_feats)]
test_feats=total_feats[len(train_feats):].drop(['Target'],axis=1)
train_feats.head()

In [None]:
useless_cols=['Marital status_5', 'Marital status_6', 'Application mode_10', 'Previous qualification_9', 'Previous qualification_2', 'Previous qualification_10', 'Previous qualification_43', 'Previous qualification_38', 'Previous qualification_4', "Mother's qualification_5", "Mother's qualification_40", "Mother's qualification_39", "Mother's qualification_9", "Mother's qualification_11", "Mother's qualification_41", "Mother's qualification_6", "Father's qualification_39", "Father's qualification_36", "Father's qualification_29", "Father's qualification_40", "Mother's occupation_99", "Mother's occupation_141", "Mother's occupation_123", "Mother's occupation_144", "Mother's occupation_192", 'Age at enrollment_51', 'Age at enrollment_58', 'Age at enrollment_55', 'Age at enrollment_53', 'Age at enrollment_17', 'Age at enrollment_52', 'Age at enrollment_54', 'Age at enrollment_70', 'Curricular units 1st sem (credited)_10', 'Curricular units 1st sem (credited)_11', 'Curricular units 1st sem (credited)_12', 'Curricular units 1st sem (credited)_13', 'Curricular units 1st sem (credited)_14', 'Curricular units 1st sem (enrolled)_17', 'Curricular units 1st sem (enrolled)_18', 'Curricular units 1st sem (enrolled)_3', 'Curricular units 1st sem (enrolled)_2', 'Curricular units 1st sem (enrolled)_1', 'Curricular units 1st sem (enrolled)_16', 'Curricular units 1st sem (evaluations)_4', 'Curricular units 1st sem (evaluations)_21', 'Curricular units 1st sem (evaluations)_20', 'Curricular units 1st sem (evaluations)_1', 'Curricular units 1st sem (evaluations)_2', 'Curricular units 1st sem (evaluations)_22', 'Curricular units 1st sem (evaluations)_3', 'Curricular units 1st sem (evaluations)_24', 'Curricular units 1st sem (evaluations)_23', 'Curricular units 1st sem (approved)_14', 'Curricular units 1st sem (approved)_18', 'Curricular units 1st sem (approved)_17', 'Curricular units 1st sem (approved)_15', 'Curricular units 1st sem (without evaluations)_3', 'Curricular units 1st sem (without evaluations)_7', 'Curricular units 1st sem (without evaluations)_6', 'Curricular units 1st sem (without evaluations)_8', 'Curricular units 1st sem (without evaluations)_5', 'Curricular units 2nd sem (credited)_11', 'Curricular units 2nd sem (credited)_7', 'Curricular units 2nd sem (credited)_9', 'Curricular units 2nd sem (credited)_10', 'Curricular units 2nd sem (credited)_12', 'Curricular units 2nd sem (credited)_8', 'Curricular units 2nd sem (credited)_13', 'Curricular units 2nd sem (enrolled)_14', 'Curricular units 2nd sem (enrolled)_17', 'Curricular units 2nd sem (enrolled)_2', 'Curricular units 2nd sem (evaluations)_4', 'Curricular units 2nd sem (evaluations)_22', 'Curricular units 2nd sem (evaluations)_21', 'Curricular units 2nd sem (evaluations)_20', 'Curricular units 2nd sem (without evaluations)_4', 'Curricular units 2nd sem (without evaluations)_5', 'Curricular units 2nd sem (without evaluations)_6', 'Curricular units 2nd sem (without evaluations)_7', 'Curricular units 2nd sem (without evaluations)_8','Curricular units 2nd sem (without evaluations)', 'Educational special needs_0','Educational special needs', 'Marital status_4', 'Application mode_51', 'Application mode_16', 'Application mode_53', 'Application mode_5', 'Application order_6', 'Course_9991', 'Course_9556', 'Course_33', 'Previous qualification_42', 'Previous qualification_6', "Mother's qualification_4", "Father's qualification_4", "Father's qualification_5", "Father's qualification_11", 'Educational special needs_1', 'Age at enrollment_35', 'Age at enrollment_36', 'Age at enrollment_38', 'Age at enrollment_45', 'Age at enrollment_40', 'Age at enrollment_44', 'Age at enrollment_48', 'Age at enrollment_50', 'Age at enrollment_49', 'Age at enrollment_46', 'Age at enrollment_42', 'Curricular units 1st sem (credited)_3', 'Curricular units 1st sem (credited)_6', 'Curricular units 1st sem (credited)_4', 'Curricular units 1st sem (credited)_7', 'Curricular units 1st sem (credited)_5', 'Curricular units 1st sem (credited)_9', 'Curricular units 1st sem (enrolled)_12', 'Curricular units 1st sem (enrolled)_11', 'Curricular units 1st sem (enrolled)_10', 'Curricular units 1st sem (enrolled)_4', 'Curricular units 1st sem (enrolled)_13', 'Curricular units 1st sem (enrolled)_15', 'Curricular units 1st sem (enrolled)_14', 'Curricular units 1st sem (evaluations)_16', 'Curricular units 1st sem (evaluations)_18', 'Curricular units 1st sem (approved)_12', 'Curricular units 1st sem (approved)_10', 'Curricular units 1st sem (approved)_13', 'Curricular units 1st sem (without evaluations)_4', 'Curricular units 2nd sem (credited)_2', 'Curricular units 2nd sem (credited)_3', 'Curricular units 2nd sem (enrolled)_11', 'Curricular units 2nd sem (enrolled)_12', 'Curricular units 2nd sem (enrolled)_13', 'Curricular units 2nd sem (enrolled)_4', 'Curricular units 2nd sem (evaluations)_14', 'Curricular units 2nd sem (evaluations)_17', 'Curricular units 2nd sem (evaluations)_18', 'Curricular units 2nd sem (approved)_11', 'Curricular units 2nd sem (approved)_10', 'Curricular units 2nd sem (approved)_13', 'Curricular units 2nd sem (without evaluations)_3', 'Unemployment rate_13.9', 'Inflation rate_-0.3', 'GDP_-1.7', 'GDP_-4.06', 'GDP_-0.92', 'GDP_0.79','Previous qualification_40', "Mother's occupation_8", "Mother's occupation_191", 'Curricular units 1st sem (credited)_8', 'Curricular units 1st sem (evaluations)_17', 'Curricular units 1st sem (approved)_8', 'Curricular units 2nd sem (enrolled)_0', 'Curricular units 2nd sem (evaluations)_19', 'Curricular units 2nd sem (approved)_12','Curricular units 1st sem (without evaluations)', 'Marital status_count', 'Application order_rank', 'Daytime/evening attendance_rank', 'Daytime/evening attendance_count', 'Nacionality_rank', 'Nacionality_count', 'Displaced_rank', 'Displaced_count', 'Educational special needs_rank', 'Educational special needs_count', 'Debtor_rank', 'Debtor_count', 'Tuition fees up to date_rank', 'Tuition fees up to date_count', 'Gender_rank', 'Gender_count', 'Scholarship holder_rank', 'Scholarship holder_count', 'International_rank', 'International_count', 'Curricular units 1st sem (without evaluations)_rank', 'Curricular units 1st sem (without evaluations)_count', 'Curricular units 2nd sem (without evaluations)_rank', 'Curricular units 2nd sem (without evaluations)_count', 'Marital status_1_rank', 'Marital status_1_count', 'Marital status_2_rank', 'Marital status_2_count', 'Marital status_4_rank', 'Marital status_4_count', 'Marital status_5_rank', 'Marital status_5_count', 'Marital status_6_rank', 'Marital status_6_count', 'Application mode_1_rank', 'Application mode_1_count', 'Application mode_17_rank', 'Application mode_17_count', 'Application mode_39_rank', 'Application mode_39_count', 'Application mode_44_rank', 'Application mode_44_count', 'Application mode_43_rank', 'Application mode_43_count', 'Application mode_7_rank', 'Application mode_7_count', 'Application mode_18_rank', 'Application mode_18_count', 'Application mode_42_rank', 'Application mode_42_count', 'Application mode_51_rank', 'Application mode_51_count', 'Application mode_16_rank', 'Application mode_16_count', 'Application mode_53_rank', 'Application mode_53_count', 'Application mode_15_rank', 'Application mode_15_count', 'Application mode_5_rank', 'Application mode_5_count', 'Application mode_10_rank', 'Application mode_10_count', 'Application order_1_rank', 'Application order_1_count', 'Application order_2_rank', 'Application order_2_count', 'Application order_3_rank', 'Application order_3_count', 'Application order_4_rank', 'Application order_4_count', 'Application order_5_rank', 'Application order_5_count', 'Application order_6_rank', 'Application order_6_count', 'Course_9500_rank', 'Course_9500_count', 'Course_9773_rank', 'Course_9773_count', 'Course_9238_rank', 'Course_9238_count', 'Course_9147_rank', 'Course_9147_count', 'Course_9254_rank', 'Course_9254_count', 'Course_9085_rank', 'Course_9085_count', 'Course_9670_rank', 'Course_9670_count', 'Course_9991_rank', 'Course_9991_count', 'Course_9003_rank', 'Course_9003_count', 'Course_9070_rank', 'Course_9070_count', 'Course_9853_rank', 'Course_9853_count', 'Course_9119_rank', 'Course_9119_count', 'Course_171_rank', 'Course_171_count', 'Course_8014_rank', 'Course_8014_count', 'Course_9130_rank', 'Course_9130_count', 'Course_9556_rank', 'Course_9556_count', 'Course_33_rank', 'Course_33_count', 'Daytime/evening attendance_1_rank', 'Daytime/evening attendance_1_count', 'Daytime/evening attendance_0_rank', 'Daytime/evening attendance_0_count', 'Previous qualification_1_rank', 'Previous qualification_1_count', 'Previous qualification_19_rank', 'Previous qualification_19_count', 'Previous qualification_39_rank', 'Previous qualification_39_count', 'Previous qualification_3_rank', 'Previous qualification_3_count', 'Previous qualification_12_rank', 'Previous qualification_12_count', 'Previous qualification_9_rank', 'Previous qualification_9_count', 'Previous qualification_40_rank', 'Previous qualification_40_count', 'Previous qualification_42_rank', 'Previous qualification_42_count', 'Previous qualification_2_rank', 'Previous qualification_2_count', 'Previous qualification_6_rank', 'Previous qualification_6_count', 'Previous qualification_10_rank', 'Previous qualification_10_count', 'Previous qualification_43_rank', 'Previous qualification_43_count', 'Previous qualification_38_rank', 'Previous qualification_38_count', 'Previous qualification_4_rank', 'Previous qualification_4_count', "Mother's qualification_1_rank", "Mother's qualification_1_count", "Mother's qualification_19_rank", "Mother's qualification_19_count", "Mother's qualification_37_rank", "Mother's qualification_37_count", "Mother's qualification_38_rank", "Mother's qualification_38_count", "Mother's qualification_3_rank", "Mother's qualification_3_count", "Mother's qualification_34_rank", "Mother's qualification_34_count", "Mother's qualification_2_rank", "Mother's qualification_2_count", "Mother's qualification_12_rank", "Mother's qualification_12_count", "Mother's qualification_4_rank", "Mother's qualification_4_count", "Mother's qualification_5_rank", "Mother's qualification_5_count", "Mother's qualification_40_rank", "Mother's qualification_40_count", "Mother's qualification_39_rank", "Mother's qualification_39_count", "Mother's qualification_9_rank", "Mother's qualification_9_count", "Mother's qualification_11_rank", "Mother's qualification_11_count", "Mother's qualification_41_rank", "Mother's qualification_41_count", "Mother's qualification_6_rank", "Mother's qualification_6_count", "Father's qualification_37_rank", "Father's qualification_37_count", "Father's qualification_19_rank", "Father's qualification_19_count", "Father's qualification_1_rank", "Father's qualification_1_count", "Father's qualification_38_rank", "Father's qualification_38_count", "Father's qualification_3_rank", "Father's qualification_3_count", "Father's qualification_34_rank", "Father's qualification_34_count", "Father's qualification_2_rank", "Father's qualification_2_count", "Father's qualification_12_rank", "Father's qualification_12_count", "Father's qualification_4_rank", "Father's qualification_4_count", "Father's qualification_39_rank", "Father's qualification_39_count", "Father's qualification_5_rank", "Father's qualification_5_count", "Father's qualification_11_rank", "Father's qualification_11_count", "Father's qualification_36_rank", "Father's qualification_36_count", "Father's qualification_29_rank", "Father's qualification_29_count", "Father's qualification_40_rank", "Father's qualification_40_count", "Mother's occupation_9_rank", "Mother's occupation_9_count", "Mother's occupation_4_rank", "Mother's occupation_4_count", "Mother's occupation_5_rank", "Mother's occupation_5_count", "Mother's occupation_3_rank", "Mother's occupation_3_count", "Mother's occupation_7_rank", "Mother's occupation_7_count", "Mother's occupation_2_rank", "Mother's occupation_2_count", "Mother's occupation_0_rank", "Mother's occupation_0_count", "Mother's occupation_90_rank", "Mother's occupation_90_count", "Mother's occupation_6_rank", "Mother's occupation_6_count", "Mother's occupation_1_rank", "Mother's occupation_1_count", "Mother's occupation_8_rank", "Mother's occupation_8_count", "Mother's occupation_191_rank", "Mother's occupation_191_count", "Mother's occupation_99_rank", "Mother's occupation_99_count", "Mother's occupation_194_rank", "Mother's occupation_194_count", "Mother's occupation_141_rank", "Mother's occupation_141_count", "Mother's occupation_123_rank", "Mother's occupation_123_count", "Mother's occupation_144_rank", "Mother's occupation_144_count", "Mother's occupation_192_rank", "Mother's occupation_192_count", 'Displaced_1_rank', 'Displaced_1_count', 'Displaced_0_rank', 'Displaced_0_count', 'Educational special needs_0_rank', 'Educational special needs_0_count', 'Educational special needs_1_rank', 'Educational special needs_1_count', 'Debtor_0_rank', 'Debtor_0_count', 'Debtor_1_rank', 'Debtor_1_count', 'Tuition fees up to date_1_rank', 'Tuition fees up to date_1_count', 'Tuition fees up to date_0_rank', 'Tuition fees up to date_0_count', 'Gender_0_rank', 'Gender_0_count', 'Gender_1_rank', 'Gender_1_count', 'Scholarship holder_0_rank', 'Scholarship holder_0_count', 'Scholarship holder_1_rank', 'Scholarship holder_1_count', 'Age at enrollment_18_rank', 'Age at enrollment_18_count', 'Age at enrollment_19_rank', 'Age at enrollment_19_count', 'Age at enrollment_20_rank', 'Age at enrollment_20_count', 'Age at enrollment_21_rank', 'Age at enrollment_21_count', 'Age at enrollment_22_rank', 'Age at enrollment_22_count', 'Age at enrollment_24_rank', 'Age at enrollment_24_count', 'Age at enrollment_25_rank', 'Age at enrollment_25_count', 'Age at enrollment_27_rank', 'Age at enrollment_27_count', 'Age at enrollment_28_rank', 'Age at enrollment_28_count', 'Age at enrollment_26_rank', 'Age at enrollment_26_count', 'Age at enrollment_23_rank', 'Age at enrollment_23_count', 'Age at enrollment_29_rank', 'Age at enrollment_29_count', 'Age at enrollment_32_rank', 'Age at enrollment_32_count', 'Age at enrollment_31_rank', 'Age at enrollment_31_count', 'Age at enrollment_34_rank', 'Age at enrollment_34_count', 'Age at enrollment_30_rank', 'Age at enrollment_30_count', 'Age at enrollment_33_rank', 'Age at enrollment_33_count', 'Age at enrollment_35_rank', 'Age at enrollment_35_count', 'Age at enrollment_37_rank', 'Age at enrollment_37_count', 'Age at enrollment_39_rank', 'Age at enrollment_39_count', 'Age at enrollment_36_rank', 'Age at enrollment_36_count', 'Age at enrollment_38_rank', 'Age at enrollment_38_count', 'Age at enrollment_41_rank', 'Age at enrollment_41_count', 'Age at enrollment_43_rank', 'Age at enrollment_43_count', 'Age at enrollment_45_rank', 'Age at enrollment_45_count', 'Age at enrollment_47_rank', 'Age at enrollment_47_count', 'Age at enrollment_40_rank', 'Age at enrollment_40_count', 'Age at enrollment_44_rank', 'Age at enrollment_44_count', 'Age at enrollment_48_rank', 'Age at enrollment_48_count', 'Age at enrollment_50_rank', 'Age at enrollment_50_count', 'Age at enrollment_49_rank', 'Age at enrollment_49_count', 'Age at enrollment_46_rank', 'Age at enrollment_46_count', 'Age at enrollment_42_rank', 'Age at enrollment_42_count', 'Age at enrollment_51_rank', 'Age at enrollment_51_count', 'Age at enrollment_58_rank', 'Age at enrollment_58_count', 'Age at enrollment_55_rank', 'Age at enrollment_55_count', 'Age at enrollment_53_rank', 'Age at enrollment_53_count', 'Age at enrollment_17_rank', 'Age at enrollment_17_count', 'Age at enrollment_52_rank', 'Age at enrollment_52_count', 'Age at enrollment_54_rank', 'Age at enrollment_54_count', 'Age at enrollment_70_rank', 'Age at enrollment_70_count', 'International_0_rank', 'International_0_count', 'International_1_rank', 'International_1_count', 'Curricular units 1st sem (credited)_0_rank', 'Curricular units 1st sem (credited)_0_count', 'Curricular units 1st sem (credited)_2_rank', 'Curricular units 1st sem (credited)_2_count', 'Curricular units 1st sem (credited)_1_rank', 'Curricular units 1st sem (credited)_1_count', 'Curricular units 1st sem (credited)_3_rank', 'Curricular units 1st sem (credited)_3_count', 'Curricular units 1st sem (credited)_6_rank', 'Curricular units 1st sem (credited)_6_count', 'Curricular units 1st sem (credited)_4_rank', 'Curricular units 1st sem (credited)_4_count', 'Curricular units 1st sem (credited)_7_rank', 'Curricular units 1st sem (credited)_7_count', 'Curricular units 1st sem (credited)_5_rank', 'Curricular units 1st sem (credited)_5_count', 'Curricular units 1st sem (credited)_8_rank', 'Curricular units 1st sem (credited)_8_count', 'Curricular units 1st sem (credited)_9_rank', 'Curricular units 1st sem (credited)_9_count', 'Curricular units 1st sem (credited)_10_rank', 'Curricular units 1st sem (credited)_10_count', 'Curricular units 1st sem (credited)_11_rank', 'Curricular units 1st sem (credited)_11_count', 'Curricular units 1st sem (credited)_12_rank', 'Curricular units 1st sem (credited)_12_count', 'Curricular units 1st sem (credited)_13_rank', 'Curricular units 1st sem (credited)_13_count', 'Curricular units 1st sem (credited)_14_rank', 'Curricular units 1st sem (credited)_14_count', 'Curricular units 1st sem (enrolled)_6_rank', 'Curricular units 1st sem (enrolled)_6_count', 'Curricular units 1st sem (enrolled)_5_rank', 'Curricular units 1st sem (enrolled)_5_count', 'Curricular units 1st sem (enrolled)_7_rank', 'Curricular units 1st sem (enrolled)_7_count', 'Curricular units 1st sem (enrolled)_8_rank', 'Curricular units 1st sem (enrolled)_8_count', 'Curricular units 1st sem (enrolled)_0_rank', 'Curricular units 1st sem (enrolled)_0_count', 'Curricular units 1st sem (enrolled)_12_rank', 'Curricular units 1st sem (enrolled)_12_count', 'Curricular units 1st sem (enrolled)_11_rank', 'Curricular units 1st sem (enrolled)_11_count', 'Curricular units 1st sem (enrolled)_10_rank', 'Curricular units 1st sem (enrolled)_10_count', 'Curricular units 1st sem (enrolled)_4_rank', 'Curricular units 1st sem (enrolled)_4_count', 'Curricular units 1st sem (enrolled)_9_rank', 'Curricular units 1st sem (enrolled)_9_count', 'Curricular units 1st sem (enrolled)_13_rank', 'Curricular units 1st sem (enrolled)_13_count', 'Curricular units 1st sem (enrolled)_15_rank', 'Curricular units 1st sem (enrolled)_15_count', 'Curricular units 1st sem (enrolled)_14_rank', 'Curricular units 1st sem (enrolled)_14_count', 'Curricular units 1st sem (enrolled)_17_rank', 'Curricular units 1st sem (enrolled)_17_count', 'Curricular units 1st sem (enrolled)_18_rank', 'Curricular units 1st sem (enrolled)_18_count', 'Curricular units 1st sem (enrolled)_3_rank', 'Curricular units 1st sem (enrolled)_3_count', 'Curricular units 1st sem (enrolled)_2_rank', 'Curricular units 1st sem (enrolled)_2_count', 'Curricular units 1st sem (enrolled)_1_rank', 'Curricular units 1st sem (enrolled)_1_count', 'Curricular units 1st sem (enrolled)_16_rank', 'Curricular units 1st sem (enrolled)_16_count', 'Curricular units 1st sem (evaluations)_8_rank', 'Curricular units 1st sem (evaluations)_8_count', 'Curricular units 1st sem (evaluations)_7_rank', 'Curricular units 1st sem (evaluations)_7_count', 'Curricular units 1st sem (evaluations)_6_rank', 'Curricular units 1st sem (evaluations)_6_count', 'Curricular units 1st sem (evaluations)_0_rank', 'Curricular units 1st sem (evaluations)_0_count', 'Curricular units 1st sem (evaluations)_9_rank', 'Curricular units 1st sem (evaluations)_9_count', 'Curricular units 1st sem (evaluations)_10_rank', 'Curricular units 1st sem (evaluations)_10_count', 'Curricular units 1st sem (evaluations)_5_rank', 'Curricular units 1st sem (evaluations)_5_count', 'Curricular units 1st sem (evaluations)_11_rank', 'Curricular units 1st sem (evaluations)_11_count', 'Curricular units 1st sem (evaluations)_12_rank', 'Curricular units 1st sem (evaluations)_12_count', 'Curricular units 1st sem (evaluations)_13_rank', 'Curricular units 1st sem (evaluations)_13_count', 'Curricular units 1st sem (evaluations)_14_rank', 'Curricular units 1st sem (evaluations)_14_count', 'Curricular units 1st sem (evaluations)_15_rank', 'Curricular units 1st sem (evaluations)_15_count', 'Curricular units 1st sem (evaluations)_16_rank', 'Curricular units 1st sem (evaluations)_16_count', 'Curricular units 1st sem (evaluations)_17_rank', 'Curricular units 1st sem (evaluations)_17_count', 'Curricular units 1st sem (evaluations)_18_rank', 'Curricular units 1st sem (evaluations)_18_count', 'Curricular units 1st sem (evaluations)_4_rank', 'Curricular units 1st sem (evaluations)_4_count', 'Curricular units 1st sem (evaluations)_19_rank', 'Curricular units 1st sem (evaluations)_19_count', 'Curricular units 1st sem (evaluations)_21_rank', 'Curricular units 1st sem (evaluations)_21_count', 'Curricular units 1st sem (evaluations)_20_rank', 'Curricular units 1st sem (evaluations)_20_count', 'Curricular units 1st sem (evaluations)_1_rank', 'Curricular units 1st sem (evaluations)_1_count', 'Curricular units 1st sem (evaluations)_2_rank', 'Curricular units 1st sem (evaluations)_2_count', 'Curricular units 1st sem (evaluations)_22_rank', 'Curricular units 1st sem (evaluations)_22_count', 'Curricular units 1st sem (evaluations)_3_rank', 'Curricular units 1st sem (evaluations)_3_count', 'Curricular units 1st sem (evaluations)_24_rank', 'Curricular units 1st sem (evaluations)_24_count', 'Curricular units 1st sem (evaluations)_23_rank', 'Curricular units 1st sem (evaluations)_23_count', 'Curricular units 1st sem (approved)_6_rank', 'Curricular units 1st sem (approved)_6_count', 'Curricular units 1st sem (approved)_0_rank', 'Curricular units 1st sem (approved)_0_count', 'Curricular units 1st sem (approved)_5_rank', 'Curricular units 1st sem (approved)_5_count', 'Curricular units 1st sem (approved)_7_rank', 'Curricular units 1st sem (approved)_7_count', 'Curricular units 1st sem (approved)_4_rank', 'Curricular units 1st sem (approved)_4_count', 'Curricular units 1st sem (approved)_3_rank', 'Curricular units 1st sem (approved)_3_count', 'Curricular units 1st sem (approved)_2_rank', 'Curricular units 1st sem (approved)_2_count', 'Curricular units 1st sem (approved)_1_rank', 'Curricular units 1st sem (approved)_1_count', 'Curricular units 1st sem (approved)_8_rank', 'Curricular units 1st sem (approved)_8_count', 'Curricular units 1st sem (approved)_11_rank', 'Curricular units 1st sem (approved)_11_count', 'Curricular units 1st sem (approved)_12_rank', 'Curricular units 1st sem (approved)_12_count', 'Curricular units 1st sem (approved)_9_rank', 'Curricular units 1st sem (approved)_9_count', 'Curricular units 1st sem (approved)_10_rank', 'Curricular units 1st sem (approved)_10_count', 'Curricular units 1st sem (approved)_13_rank', 'Curricular units 1st sem (approved)_13_count', 'Curricular units 1st sem (approved)_14_rank', 'Curricular units 1st sem (approved)_14_count', 'Curricular units 1st sem (approved)_18_rank', 'Curricular units 1st sem (approved)_18_count', 'Curricular units 1st sem (approved)_17_rank', 'Curricular units 1st sem (approved)_17_count', 'Curricular units 1st sem (approved)_15_rank', 'Curricular units 1st sem (approved)_15_count', 'Curricular units 1st sem (without evaluations)_0_rank', 'Curricular units 1st sem (without evaluations)_0_count', 'Curricular units 1st sem (without evaluations)_1_rank', 'Curricular units 1st sem (without evaluations)_1_count', 'Curricular units 1st sem (without evaluations)_2_rank', 'Curricular units 1st sem (without evaluations)_2_count', 'Curricular units 1st sem (without evaluations)_3_rank', 'Curricular units 1st sem (without evaluations)_3_count', 'Curricular units 1st sem (without evaluations)_4_rank', 'Curricular units 1st sem (without evaluations)_4_count', 'Curricular units 1st sem (without evaluations)_7_rank', 'Curricular units 1st sem (without evaluations)_7_count', 'Curricular units 1st sem (without evaluations)_6_rank', 'Curricular units 1st sem (without evaluations)_6_count', 'Curricular units 1st sem (without evaluations)_8_rank', 'Curricular units 1st sem (without evaluations)_8_count', 'Curricular units 1st sem (without evaluations)_5_rank', 'Curricular units 1st sem (without evaluations)_5_count', 'Curricular units 2nd sem (credited)_0_rank', 'Curricular units 2nd sem (credited)_0_count', 'Curricular units 2nd sem (credited)_1_rank', 'Curricular units 2nd sem (credited)_1_count', 'Curricular units 2nd sem (credited)_2_rank', 'Curricular units 2nd sem (credited)_2_count', 'Curricular units 2nd sem (credited)_4_rank', 'Curricular units 2nd sem (credited)_4_count', 'Curricular units 2nd sem (credited)_5_rank', 'Curricular units 2nd sem (credited)_5_count', 'Curricular units 2nd sem (credited)_3_rank', 'Curricular units 2nd sem (credited)_3_count', 'Curricular units 2nd sem (credited)_6_rank', 'Curricular units 2nd sem (credited)_6_count', 'Curricular units 2nd sem (credited)_11_rank', 'Curricular units 2nd sem (credited)_11_count', 'Curricular units 2nd sem (credited)_7_rank', 'Curricular units 2nd sem (credited)_7_count', 'Curricular units 2nd sem (credited)_9_rank', 'Curricular units 2nd sem (credited)_9_count', 'Curricular units 2nd sem (credited)_10_rank', 'Curricular units 2nd sem (credited)_10_count', 'Curricular units 2nd sem (credited)_12_rank', 'Curricular units 2nd sem (credited)_12_count', 'Curricular units 2nd sem (credited)_8_rank', 'Curricular units 2nd sem (credited)_8_count', 'Curricular units 2nd sem (credited)_13_rank', 'Curricular units 2nd sem (credited)_13_count', 'Curricular units 2nd sem (enrolled)_6_rank', 'Curricular units 2nd sem (enrolled)_6_count', 'Curricular units 2nd sem (enrolled)_5_rank', 'Curricular units 2nd sem (enrolled)_5_count', 'Curricular units 2nd sem (enrolled)_8_rank', 'Curricular units 2nd sem (enrolled)_8_count', 'Curricular units 2nd sem (enrolled)_7_rank', 'Curricular units 2nd sem (enrolled)_7_count', 'Curricular units 2nd sem (enrolled)_0_rank', 'Curricular units 2nd sem (enrolled)_0_count', 'Curricular units 2nd sem (enrolled)_11_rank', 'Curricular units 2nd sem (enrolled)_11_count', 'Curricular units 2nd sem (enrolled)_9_rank', 'Curricular units 2nd sem (enrolled)_9_count', 'Curricular units 2nd sem (enrolled)_10_rank', 'Curricular units 2nd sem (enrolled)_10_count', 'Curricular units 2nd sem (enrolled)_12_rank', 'Curricular units 2nd sem (enrolled)_12_count', 'Curricular units 2nd sem (enrolled)_13_rank', 'Curricular units 2nd sem (enrolled)_13_count', 'Curricular units 2nd sem (enrolled)_4_rank', 'Curricular units 2nd sem (enrolled)_4_count', 'Curricular units 2nd sem (enrolled)_14_rank', 'Curricular units 2nd sem (enrolled)_14_count', 'Curricular units 2nd sem (enrolled)_17_rank', 'Curricular units 2nd sem (enrolled)_17_count', 'Curricular units 2nd sem (enrolled)_2_rank', 'Curricular units 2nd sem (enrolled)_2_count', 'Curricular units 2nd sem (evaluations)_8_rank', 'Curricular units 2nd sem (evaluations)_8_count', 'Curricular units 2nd sem (evaluations)_6_rank', 'Curricular units 2nd sem (evaluations)_6_count', 'Curricular units 2nd sem (evaluations)_7_rank', 'Curricular units 2nd sem (evaluations)_7_count', 'Curricular units 2nd sem (evaluations)_0_rank', 'Curricular units 2nd sem (evaluations)_0_count', 'Curricular units 2nd sem (evaluations)_9_rank', 'Curricular units 2nd sem (evaluations)_9_count', 'Curricular units 2nd sem (evaluations)_5_rank', 'Curricular units 2nd sem (evaluations)_5_count', 'Curricular units 2nd sem (evaluations)_10_rank', 'Curricular units 2nd sem (evaluations)_10_count', 'Curricular units 2nd sem (evaluations)_11_rank', 'Curricular units 2nd sem (evaluations)_11_count', 'Curricular units 2nd sem (evaluations)_12_rank', 'Curricular units 2nd sem (evaluations)_12_count', 'Curricular units 2nd sem (evaluations)_13_rank', 'Curricular units 2nd sem (evaluations)_13_count', 'Curricular units 2nd sem (evaluations)_14_rank', 'Curricular units 2nd sem (evaluations)_14_count', 'Curricular units 2nd sem (evaluations)_15_rank', 'Curricular units 2nd sem (evaluations)_15_count', 'Curricular units 2nd sem (evaluations)_16_rank', 'Curricular units 2nd sem (evaluations)_16_count', 'Curricular units 2nd sem (evaluations)_17_rank', 'Curricular units 2nd sem (evaluations)_17_count', 'Curricular units 2nd sem (evaluations)_19_rank', 'Curricular units 2nd sem (evaluations)_19_count', 'Curricular units 2nd sem (evaluations)_18_rank', 'Curricular units 2nd sem (evaluations)_18_count', 'Curricular units 2nd sem (evaluations)_4_rank', 'Curricular units 2nd sem (evaluations)_4_count', 'Curricular units 2nd sem (evaluations)_22_rank', 'Curricular units 2nd sem (evaluations)_22_count', 'Curricular units 2nd sem (evaluations)_21_rank', 'Curricular units 2nd sem (evaluations)_21_count', 'Curricular units 2nd sem (evaluations)_20_rank', 'Curricular units 2nd sem (evaluations)_20_count', 'Curricular units 2nd sem (approved)_6_rank', 'Curricular units 2nd sem (approved)_6_count', 'Curricular units 2nd sem (approved)_0_rank', 'Curricular units 2nd sem (approved)_0_count', 'Curricular units 2nd sem (approved)_5_rank', 'Curricular units 2nd sem (approved)_5_count', 'Curricular units 2nd sem (approved)_4_rank', 'Curricular units 2nd sem (approved)_4_count', 'Curricular units 2nd sem (approved)_8_rank', 'Curricular units 2nd sem (approved)_8_count', 'Curricular units 2nd sem (approved)_3_rank', 'Curricular units 2nd sem (approved)_3_count', 'Curricular units 2nd sem (approved)_7_rank', 'Curricular units 2nd sem (approved)_7_count', 'Curricular units 2nd sem (approved)_2_rank', 'Curricular units 2nd sem (approved)_2_count', 'Curricular units 2nd sem (approved)_1_rank', 'Curricular units 2nd sem (approved)_1_count', 'Curricular units 2nd sem (approved)_11_rank', 'Curricular units 2nd sem (approved)_11_count', 'Curricular units 2nd sem (approved)_9_rank', 'Curricular units 2nd sem (approved)_9_count', 'Curricular units 2nd sem (approved)_10_rank', 'Curricular units 2nd sem (approved)_10_count', 'Curricular units 2nd sem (approved)_12_rank', 'Curricular units 2nd sem (approved)_12_count', 'Curricular units 2nd sem (approved)_13_rank', 'Curricular units 2nd sem (approved)_13_count', 'Curricular units 2nd sem (without evaluations)_0_rank', 'Curricular units 2nd sem (without evaluations)_0_count', 'Curricular units 2nd sem (without evaluations)_1_rank', 'Curricular units 2nd sem (without evaluations)_1_count', 'Curricular units 2nd sem (without evaluations)_2_rank', 'Curricular units 2nd sem (without evaluations)_2_count', 'Curricular units 2nd sem (without evaluations)_3_rank', 'Curricular units 2nd sem (without evaluations)_3_count', 'Curricular units 2nd sem (without evaluations)_4_rank', 'Curricular units 2nd sem (without evaluations)_4_count', 'Curricular units 2nd sem (without evaluations)_5_rank', 'Curricular units 2nd sem (without evaluations)_5_count', 'Curricular units 2nd sem (without evaluations)_6_rank', 'Curricular units 2nd sem (without evaluations)_6_count', 'Curricular units 2nd sem (without evaluations)_7_rank', 'Curricular units 2nd sem (without evaluations)_7_count', 'Curricular units 2nd sem (without evaluations)_8_rank', 'Curricular units 2nd sem (without evaluations)_8_count', 'Unemployment rate_7.6_rank', 'Unemployment rate_7.6_count', 'Unemployment rate_9.4_rank', 'Unemployment rate_9.4_count', 'Unemployment rate_10.8_rank', 'Unemployment rate_10.8_count', 'Unemployment rate_12.4_rank', 'Unemployment rate_12.4_count', 'Unemployment rate_12.7_rank', 'Unemployment rate_12.7_count', 'Unemployment rate_11.1_rank', 'Unemployment rate_11.1_count', 'Unemployment rate_15.5_rank', 'Unemployment rate_15.5_count', 'Unemployment rate_16.2_rank', 'Unemployment rate_16.2_count', 'Unemployment rate_13.9_rank', 'Unemployment rate_13.9_count', 'Unemployment rate_8.9_rank', 'Unemployment rate_8.9_count', 'Inflation rate_1.4_rank', 'Inflation rate_1.4_count', 'Inflation rate_2.6_rank', 'Inflation rate_2.6_count', 'Inflation rate_-0.8_rank', 'Inflation rate_-0.8_count', 'Inflation rate_0.5_rank', 'Inflation rate_0.5_count', 'Inflation rate_3.7_rank', 'Inflation rate_3.7_count', 'Inflation rate_0.6_rank', 'Inflation rate_0.6_count', 'Inflation rate_2.8_rank', 'Inflation rate_2.8_count', 'Inflation rate_0.3_rank', 'Inflation rate_0.3_count', 'Inflation rate_-0.3_rank', 'Inflation rate_-0.3_count', 'GDP_0.32_rank', 'GDP_0.32_count', 'GDP_-3.12_rank', 'GDP_-3.12_count', 'GDP_1.74_rank', 'GDP_1.74_count', 'GDP_1.79_rank', 'GDP_1.79_count', 'GDP_-1.7_rank', 'GDP_-1.7_count', 'GDP_2.02_rank', 'GDP_2.02_count', 'GDP_-4.06_rank', 'GDP_-4.06_count', 'GDP_-0.92_rank', 'GDP_-0.92_count', 'GDP_0.79_rank', 'GDP_0.79_count', 'GDP_3.51_rank', 'GDP_3.51_count', 'corr_0.05_count', 'corr_0.025_count', 'corr_0.01_count', 'corr_0.005_count']

choose_cols=[ col for col in test_feats.columns if col not in useless_cols]


def fit_and_predict(train_feats=train_feats,test_feats=test_feats,model=None,num_folds=10,seed=2024):
    X=train_feats[choose_cols].copy()
    y=train_feats['Target'].copy()
    test_X=test_feats[choose_cols].copy()
    test_pred_pro=np.zeros((num_folds,len(test_X),3))

    accs=0.0
    #10折交叉验证
    skf = StratifiedKFold(n_splits=num_folds,shuffle=True)
    for fold, (train_index, valid_index) in (enumerate(skf.split(X,y))):
        print(f"fold:{fold}")

        X_train, X_valid = X.iloc[train_index], X.iloc[valid_index]
        y_train, y_valid = y.iloc[train_index], y.iloc[valid_index]

        #先训练一波
        model.fit(X_train,y_train,eval_set=[(X_valid, y_valid)],
                  callbacks=[log_evaluation(100),early_stopping(100)],
                     )
        valid_pred=model.predict(X_valid)

        acc=accuracy_score(y_valid,valid_pred)
        accs+=acc
        print(f"accuracy:{acc}")

        feature_importance=list(model.feature_importances_)
        useless_cols=[]
        for i in range(len(choose_cols)):
            if feature_importance[i]==0:
                useless_cols.append(choose_cols[i])
        print(f"useless_cols:{useless_cols}")
        #将数据分批次进行预测.
        test_pred_pro[fold]=model.predict_proba(test_X)
    test_pros=test_pred_pro.mean(axis=0)
    print(f"mean_accuracy:{accs/num_folds}")

    return test_pros

lgb_params1 = {"boosting_type": "gbdt","objective": "multiclass","metric": "multi_logloss", 'num_class':3,
                         'random_state': 2024, 'n_estimators': 1024,
                         'reg_alpha': 0.1, 'reg_lambda': 10,
                         'colsample_bytree': 0.8, 'subsample': 0.8,
                         'learning_rate': 0.05, 'num_leaves': 64, 'min_child_samples': 62,
                         'max_bin':245, "extra_trees": True,
                    }

lgb_params2 = {"boosting_type": "gbdt","objective": "multiclass","metric": "multi_logloss", 'num_class':3,
     'random_state': 2024, 'n_estimators': 1024,
     'reg_alpha': 2.6756579164398144, 'reg_lambda': 1.6187614490530422,
     'colsample_bytree': 0.7, 'subsample': 0.7,
     'learning_rate': 0.030479973315991688, 'num_leaves': 50, 'min_child_samples': 62,
     'max_bin':245,
}

test_pros1=fit_and_predict(model=LGBMClassifier(**lgb_params1),num_folds=10,seed=2024)

test_pros2=fit_and_predict(model=LGBMClassifier(**lgb_params2),num_folds=10,seed=2024)

test_pros=(test_pros1+test_pros2)/2

test_preds=np.argmax(test_pros,axis=1).astype(int)

In [None]:
submission=pd.read_csv("/kaggle/input/playground-series-s4e6/sample_submission.csv")
submission['Target']=test_preds
submission['Target']=submission['Target'].apply(lambda x:idx2target[x])
submission.to_csv("submission.csv",index=None)
submission.head()