In [5]:
import pandas as pd
import joblib 
from sklearn.linear_model import LinearRegression 

# --- 1. Load Processed Training Data ---

try:
    x_train = joblib.load('x_train.pkl')
    x_test=joblib.load('x_test.pkl')
    y_train=joblib.load('y_train.pkl')
    y_train_r=y_train[['rice']]
    y_test=joblib.load('y_test.pkl')
    y_test_r=y_test[['rice']]

except FileNotFoundError:
    print("-" * 60)
    print("FATAL ERROR: Processed data files not found.")
    print("Ensure the pipeline_exporter script has been run successfully to create these PKL files.")
    print("-" * 60)
    raise

print(f"‚úÖ Successfully loaded features ({x_train.shape}) and target ({y_train_r}).")
print("-" * 60)


# --- 2. Initialize, Train, and Export Model ---

# GRID SEARCH SVM
import joblib 
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import GridSearchCV # <--- NEW IMPORT
import numpy as np


# ----------------------------------------------

# --- 1. Define the Parameter Grid for Grid Search ---
# This dictionary holds the hyperparameters and the values you want to test.
param_grid = {
    'C': [1, 10, 100],  # Regularization parameter
    'gamma': ['scale', 'auto', 0.1, 1], # Kernel coefficient
    'kernel': ['rbf'] # For SVR, 'rbf' is commonly used
}

# --- 2. Define the Base Model ---
base_model = SVR()

# --- 3. Grid Search and Training Loop ---
print("Starting Grid Search CV and training of 6 individual SVR models...")
print("-" * 60)

# NOTE: Since your original snippet only shows training for a single model
# (using x_train and y_train_g), the example below simulates the loop structure
# you would need for all six targets.

# Assuming you are iterating through targets like this:
# for i, target_col in enumerate(TARGET_COLUMNS):
#     y_train_g = Y_for_fit_train.iloc[:, i] 
#     y_test_g = Y_for_fit_test.iloc[:, i]
#     ...

# --- Example for a single target (as in your original snippet) ---

# Initialize the GridSearchCV object
# cv=5 means 5-fold cross-validation
# scoring='neg_mean_squared_error' is common for regression (it maximizes the negative MSE, 
# which is equivalent to minimizing the positive MSE/RMSE)
grid_search = GridSearchCV(
    estimator=base_model, 
    param_grid=param_grid, 
    scoring='neg_mean_squared_error',
    cv=10, 
    verbose=1,
    n_jobs=-1 # Use all available cores
)

# Fit Grid Search to the training data
# This step performs the cross-validation across all parameter combinations
grid_search.fit(x_train, y_train_r) 

# The best model found by the grid search
best_model = grid_search.best_estimator_

# Get the best parameters
best_params = grid_search.best_params_
print(f" üèÜ Best Parameters Found: {best_params}")

# Optional: Quick Evaluation on Testing Data (for confidence)
y_pred_r = best_model.predict(x_test)
rmse = np.sqrt(mean_squared_error(y_test_r, y_pred_r))
    
print(f" üéØ (Test RMSE: {rmse:.4f})")

# Export the best model (e.g., using a target-specific filename)
# model_filename = f'svr_model_{target_col}.pkl'
# joblib.dump(best_model, model_filename)
# print(f" üíæ Model saved as {model_filename}")

print("-" * 60)
print("Grid Search CV complete. The best SVR model is trained and ready for export.")

# C. Export the Fitted Model
# This is the file you need to upload as 'protein.pkl' (or rename it after export)
joblib.dump(best_model, 'rice.pkl')

print("‚úÖ Trained model saved to rice.pkl. Ready for prediction.")
print("-" * 60)

‚úÖ Successfully loaded features ((870, 42)) and target (      rice
834      1
552    -40
855    -14
215     -5
256     -4
...    ...
330     -6
466    -19
121     -7
1044    -4
860      0

[870 rows x 1 columns]).
------------------------------------------------------------
Starting Grid Search CV and training of 6 individual SVR models...
------------------------------------------------------------
Fitting 10 folds for each of 12 candidates, totalling 120 fits


  y = column_or_1d(y, warn=True)


 üèÜ Best Parameters Found: {'C': 100, 'gamma': 'scale', 'kernel': 'rbf'}
 üéØ (Test RMSE: 5.0471)
------------------------------------------------------------
Grid Search CV complete. The best SVR model is trained and ready for export.
‚úÖ Trained model saved to rice.pkl. Ready for prediction.
------------------------------------------------------------


In [6]:

from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
import numpy as np

r2 = r2_score(y_test_r, y_pred_r)


mae = mean_absolute_error(y_test_r, y_pred_r)


mse = mean_squared_error(y_test_r, y_pred_r)
rmse = np.sqrt(mse)

print("--- Model Accuracy for Protein Feed Yield ---")
print(f"R-squared (R¬≤) Score: {r2:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f} percentage points")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f} percentage points")



--- Model Accuracy for Protein Feed Yield ---
R-squared (R¬≤) Score: 0.8373
Mean Absolute Error (MAE): 2.7455 percentage points
Root Mean Squared Error (RMSE): 5.0471 percentage points
