

# **Install Libraries**



In [None]:
!pip install -q lightgbm xgboost catboost pyarrow fastparquet

#**Importing libraries**

In [None]:
import pandas as pd
import numpy as np
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score
import warnings
warnings.filterwarnings("ignore")

#**Loading the Data**

In [None]:
train_df = pd.read_parquet("train.parquet")
test_df = pd.read_parquet("test.parquet")
print(train_df.shape, test_df.shape)
train_df.head()

#**Feature Engineering**

In [None]:
def create_features(df):
    df = df.copy()

    if 'Date' in df.columns:
        df['Date'] = pd.to_datetime(df['Date'])
        df['year'] = df['Date'].dt.year
        df['month'] = df['Date'].dt.month
        df['day'] = df['Date'].dt.day
        df['dayofweek'] = df['Date'].dt.dayofweek
        df['hour'] = df['Date'].dt.hour

        df['month_sin'] = np.sin(2*np.pi*df['month']/12)
        df['month_cos'] = np.cos(2*np.pi*df['month']/12)
        df['dow_sin'] = np.sin(2*np.pi*df['dayofweek']/7)
        df['dow_cos'] = np.cos(2*np.pi*df['dayofweek']/7)

        df.drop(columns=['Date'], inplace=True)

    return df

train_df = create_features(train_df)
test_df = create_features(test_df)

#**Fixing Target Type**






In [None]:
train_df["target"] = train_df["target"].astype(int)

#**Spliting the Data**

In [None]:
TARGET = "target"
FEATURES = [c for c in train_df.columns if c not in ["target", "ID"]]
X = train_df[FEATURES]
y = train_df[TARGET]
X_test = test_df[FEATURES]
test_ids = test_df["ID"] if "ID" in test_df.columns else test_df.index
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

#**Training with High-Performance LightGBM**

In [None]:
model = lgb.LGBMClassifier(
    n_estimators=2000,
    learning_rate=0.03,
    num_leaves=64,
    max_depth=-1,
    subsample=0.8,
    colsample_bytree=0.8,
    class_weight="balanced",
    random_state=42,
    n_jobs=-1
)

model.fit(
    X_train, y_train,
    eval_set=[(X_val, y_val)],
    eval_metric="binary_logloss",
    callbacks=[lgb.early_stopping(100)]
)

#**Threshold Optimization for F1 Score**

In [None]:
val_probs = model.predict_proba(X_val)[:,1]
best_f1 = 0
best_thresh = 0.5
for t in np.arange(0.2, 0.8, 0.01):
    preds = (val_probs > t).astype(int)
    score = f1_score(y_val, preds)
    if score > best_f1:
        best_f1 = score
        best_thresh = t
print("Best F1:", best_f1)
print("Best Threshold:", best_thresh)

#**Final Prediction**

In [None]:
test_probs = model.predict_proba(X_test)[:,1]
test_preds = (test_probs > best_thresh).astype(int)

#**Submission CSV file**

In [None]:
submission = pd.DataFrame({
    "ID": test_ids,
    "target": test_preds
})
submission.to_csv("submission.csv", index=False)
submission.head()