In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import missingno as msno

import lightgbm as lgb

from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score

In [None]:
def r2_score_eval(preds, train_data):
    y_true = train_data.get_label()
    r2 = r2_score(y_true, preds)
    return 'r2_score', r2, True

In [None]:
train_path = '/kaggle/input/playground-series-s4e5/train.csv'
test_path = '/kaggle/input/playground-series-s4e5/test.csv'
submission_path = '/kaggle/input/playground-series-s4e5/sample_submission.csv'

In [None]:
def read_data(path, index_col=None):
    df = pd.read_csv(path)
    if index_col != None:
        df = df.set_index(index_col)
    return df

train_df = read_data(train_path, 'id')
test_df = read_data(test_path, 'id')

In [None]:
label = 'FloodProbability'
X = train_df.drop(columns=[label])
y = train_df[[label]]

# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2)

In [None]:
# Create LightGBM dataset
train_data = lgb.Dataset(X_train, label=y_train)
val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

#### Hyperparameters

In [None]:
params = {
    'objective': 'regression',
    'metric': 'None',
    'num_leaves': 31,
}
num_round = 100

In [None]:
model = lgb.train(params, train_data, num_round, valid_sets=[val_data],feval=r2_score_eval)

In [None]:
# Make predictions
y_pred = model.predict(X_val, num_iteration=model.best_iteration)

# Evaluate the model
r2 = r2_score(y_val, y_pred)
print(f'R² Score: {r2}')

### Submission

In [None]:
test_data = read_data(test_path, 'id')
submission = pd.read_csv(submission_path)
preds = model.predict(test_data)
submission['FloodProbability'] = preds
submission.to_csv('submission.csv', index=False)
!head submission.csv