In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, r2_score
import joblib

In [2]:

# Load the synthetic dataset
data = pd.read_csv('synthetic_lettuce_growth_data.csv')


In [3]:

# Separate features and target variable
X = data.drop('Cumulative Lettuce Growth', axis=1)
y = data['Cumulative Lettuce Growth']



In [4]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [5]:

# Create Random Forest Regressor
rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42)
rf_regressor.fit(X_train, y_train)
rf_predictions = rf_regressor.predict(X_test)



In [6]:
# Create Gradient Boosting Regressor
gb_regressor = GradientBoostingRegressor(n_estimators=100, random_state=42)
gb_regressor.fit(X_train, y_train)
gb_predictions = gb_regressor.predict(X_test)

In [7]:
# Save the base models
joblib.dump(rf_regressor, 'rf_model.joblib')
joblib.dump(gb_regressor, 'gb_model.joblib')

['gb_model.joblib']

In [8]:
# Ensemble the predictions using averaging
ensemble_predictions = (rf_predictions + gb_predictions) / 2

In [9]:
# Evaluate the ensemble model
print("Ensemble Model Performance:")
print("Mean Squared Error:", mean_squared_error(y_test, ensemble_predictions))
print("R-squared:", r2_score(y_test, ensemble_predictions))

Ensemble Model Performance:
Mean Squared Error: 7.0076869284006795
R-squared: 0.9953270323852147


In [10]:
ensemble_model = {
    'rf_model': rf_regressor,
    'gb_model': gb_regressor
}
joblib.dump(ensemble_model, 'ensemble_model.joblib')

['ensemble_model.joblib']