# Yield Prediction using XGBoost and Optuna

This notebook solves problem using XGBoost Regressor and Optuna for hyperparameter tuning.

## Step 1: Import Libraries

In [None]:
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import KFold, cross_val_score
import optuna

##  Step 2: Load the Dataset

In [None]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")

##  Step 3: Prepare Features and Target

In [None]:
X = train.drop(columns=["id", "yield"])
y = train["yield"]
X_test = test.drop(columns=["id"])

## Step 4: Set up Cross-Validation

In [None]:
cv = KFold(n_splits=5, shuffle=True, random_state=42)

## Step 5: Define the Optuna Objective Function

In [None]:
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.5, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
        'gamma': trial.suggest_float('gamma', 0, 5),
        'reg_alpha': trial.suggest_float('reg_alpha', 0, 1),
        'reg_lambda': trial.suggest_float('reg_lambda', 0, 1),
        'random_state': 42
    }
    model = xgb.XGBRegressor(**params)
    score = cross_val_score(model, X, y, scoring='neg_mean_absolute_error', cv=cv).mean()
    return -score

## Step 6: Run the Optuna Study

In [None]:
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=25)

## Step 7: Train Final Model with Best Parameters

In [None]:
best_model = xgb.XGBRegressor(**study.best_params)
best_model.fit(X, y)

## Step 8: Make Predictions on Test Set

In [None]:
preds = best_model.predict(X_test)

## Step 9: Save Submission File

In [None]:
submission = pd.DataFrame({
    'id': test['id'],
    'yield': preds
})
submission.to_csv('submission.csv', index=False)