In [None]:
# ============================================================
# 1. Import Libraries
# ============================================================
import pandas as pd
import lightgbm as lgb
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import os
import warnings
warnings.filterwarnings("ignore")  # Disable all warnings


# ============================================================
# 2. Height Conversion Function
# ============================================================
def convert_height_to_inches(value):
    """
    Convert height from format '6-4' (feet-inches) into numeric inches.
    If invalid or missing, return -1.
    """
    try:
        feet, inches = value.split('-')
        return int(feet) * 12 + int(inches)
    except:
        return -1


# ============================================================
# 3. Load Dataset
# ============================================================
df = pd.read_csv(
    "/kaggle/input/nfl-big-data-bowl-2026-analytics-1/merged_cleaned_features.csv",
    low_memory=False
)

# Convert height
df["player_height"] = df["player_height"].astype(str).apply(convert_height_to_inches)

# Convert weight safely
df["player_weight"] = pd.to_numeric(df["player_weight"], errors="coerce").fillna(-1)


# ============================================================
# 4. Feature Selection
# ============================================================
features = [
    'x_input','y_input','s','s_rolling_3','a','dir','o',
    'absolute_yardline_number','player_height','player_weight',
    'score_difference','distance_to_ball','week'
]

target_x = "x_output"
target_y = "y_output"

df = df.dropna(subset=[target_x, target_y])
df = df.fillna(-1)

X = df[features]
y_x = df[target_x]
y_y = df[target_y]


# ============================================================
# 5. Train-Test Split
# ============================================================
X_train, X_test, yx_train, yx_test, yy_train, yy_test = train_test_split(
    X, y_x, y_y, test_size=0.2, random_state=42
)


# ============================================================
# 6. LightGBM Training Function
# ============================================================
def train_lgb_regressor(X_train, y_train):
    params = {
        "objective": "regression",
        "metric": "rmse",
        "boosting_type": "gbdt",
        "learning_rate": 0.05,
        "num_leaves": 64,
        "n_estimators": 300,
        "verbose": -1
    }
    model = lgb.LGBMRegressor(**params)
    model.fit(X_train, y_train)
    return model


# ============================================================
# 7. Train Models
# ============================================================
model_x = train_lgb_regressor(X_train, yx_train)
model_y = train_lgb_regressor(X_train, yy_train)


# ============================================================
# 8. Predictions
# ============================================================
pred_x = model_x.predict(X_test)
pred_y = model_y.predict(X_test)


# ============================================================
# 9. Evaluation
# ============================================================
rmse_x = mean_squared_error(yx_test, pred_x) ** 0.5
rmse_y = mean_squared_error(yy_test, pred_y) ** 0.5

print(f"✅ RMSE X: {rmse_x:.4f}")
print(f"✅ RMSE Y: {rmse_y:.4f}")


# ============================================================
# 10. Save Plots
# ============================================================
output_dir = "/kaggle/working/plots_lgbm"
os.makedirs(output_dir, exist_ok=True)

plt.figure(figsize=(7,6))
plt.scatter(yx_test, pred_x, alpha=0.4, s=10)
plt.xlabel("Actual X Output")
plt.ylabel("Predicted X Output")
plt.title("Actual vs Predicted X (LightGBM)")
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "pred_vs_actual_x.png"), dpi=150)
plt.close()

plt.figure(figsize=(7,6))
plt.scatter(yy_test, pred_y, alpha=0.4, s=10)
plt.xlabel("Actual Y Output")
plt.ylabel("Predicted Y Output")
plt.title("Actual vs Predicted Y (LightGBM)")
plt.tight_layout()
plt.savefig(os.path.join(output_dir, "pred_vs_actual_y.png"), dpi=150)
plt.close()

print("✅ Plots saved:", output_dir)


# ============================================================
# 11. Save Predictions
# ============================================================
df_pred = X_test.copy()
df_pred["pred_x"] = pred_x
df_pred["pred_y"] = pred_y

df_pred.to_csv("lightgbm_predictions.csv", index=False)
print("✅ Predictions saved to lightgbm_predictions.csv")
