In [3]:
# Importing libraries
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, StratifiedKFold
from lightgbm import LGBMClassifier


# Loading the data
# file_path_train = r'C:\path\to\your\data\train.csv' 
file_path_train = r'C:\Users\12698\OneDrive\Desktop\train_original.csv'

df_train = pd.read_csv(file_path_train)

# Dropping the first column
df_train = df_train.drop(columns='id')

category_columns_train = [0,1,3,4,5,7,8,9,10,11,13,14,15,16,17,18,20,36]
for col in category_columns_train:
    df_train.iloc[:, col] = df_train.iloc[:, col].astype('str').astype('category')


# Optimizing the data

# Converting the float columns to numbers between -127 and 127 to convert to int8
def normalize_to_int8(series):
    normalized = (series - series.min()) * (252 / (series.max() - series.min())) - 126
    return normalized.astype('int8')

columns_to_normalize = [6,12,25,31,33,34,35]
for col in columns_to_normalize:
    df_train.iloc[:, col] = normalize_to_int8(df_train.iloc[:, col])

    
# Creating empty dataframes with int8 data types to optimize the data
df_train_opt = pd.DataFrame(0, index=df_train.index, columns=df_train.columns, dtype='int8')

# Copying the data to new dataframes
for col in df_train.columns[:-1]:
    df_train_opt[col] = df_train[col].astype('int8')

df_train_opt['Target'] = df_train['Target'].astype('str').astype('category')

# Updating the datatype of the categorial data
category_columns_train = [0,1,3,4,5,7,8,9,10,11,13,14,15,16,17,18,20,36]
for col in category_columns_train:
    df_train_opt.iloc[:, col] = df_train_opt.iloc[:, col].astype('str').astype('category')

# Checking the datatypes    
print(df_train_opt.dtypes)

X = df_train_opt.drop(columns='Target')
y = df_train_opt['Target']

# Split the data
X_train, X_val, y_train, y_val = train_test_split(X, y, shuffle=True, test_size=0.3, random_state=0, stratify=y)

#model building
model = LGBMClassifier(random_state=1, learning_rate = 0.15371303284747684, max_depth= 6, num_leaves=37)
kf = StratifiedKFold(n_splits=5, shuffle=True, random_state=1)
scores = cross_val_score(model, X, y, scoring='accuracy', cv=kf, n_jobs=-1)

print("accuracy:", scores.mean())

1        1
2        1
3        1
4        1
        ..
76513    1
76514    1
76515    5
76516    1
76517    1
Name: Marital status, Length: 76518, dtype: category
Categories (6, object): ['1', '2', '3', '4', '5', '6']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_train.iloc[:, col] = df_train.iloc[:, col].astype('str').astype('category')
1        17
2        17
3         1
4         1
         ..
76513    17
76514     1
76515    17
76516     1
76517     1
Name: Application mode, Length: 76518, dtype: category
Categories (22, object): ['1', '10', '12', '15', ..., '51', '53', '7', '9']' has dtype incompatible with int64, please explicitly cast to a compatible dtype first.
  df_train.iloc[:, col] = df_train.iloc[:, col].astype('str').astype('category')
1        9238
2        9254
3        9500
4        9500
         ... 
76513    9254
76514    9254
76515    9085
76516    9070
76517    9773
Name: Course, Length: 76518, dtype: category
Categori

Marital status                                    category
Application mode                                  category
Application order                                     int8
Course                                            category
Daytime/evening attendance                        category
Previous qualification                            category
Previous qualification (grade)                        int8
Nacionality                                       category
Mother's qualification                            category
Father's qualification                            category
Mother's occupation                               category
Father's occupation                               category
Admission grade                                       int8
Displaced                                         category
Educational special needs                         category
Debtor                                            category
Tuition fees up to date                           catego