In [1]:
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import pickle
import pandas as pd
from sklearn.preprocessing import StandardScaler

# Load training data
df = pd.read_csv("cement_data_large.csv")
X = df.drop("Compressive Strength", axis=1)
y = df["Compressive Strength"]

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Train-test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Define parameter grid
param_grid = {
    "n_estimators": [100, 150, 200],
    "max_depth": [None, 10, 20, 30]
}

# Grid search
grid_search = GridSearchCV(RandomForestRegressor(random_state=42), param_grid, cv=3, scoring="r2", n_jobs=-1)
grid_search.fit(X_train, y_train)

# Best model
best_model = grid_search.best_estimator_

# Save the model
with open("model.pkl", "wb") as f:
    pickle.dump(best_model, f)

# Predictions and evaluation
y_pred = best_model.predict(X_test)
print("Best Parameters:", grid_search.best_params_)
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))

Best Parameters: {'max_depth': None, 'n_estimators': 200}
MSE: 58.026576497867374
R2 Score: 0.8509436211625773
