In [None]:
# rf_model.py

import numpy as np
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Load dataset
df = pd.read_csv('original_dataset.csv')
X = df[['FTC(No.)', 'CS(MPa)', 'W/C', 'MinT(C)', 'MaxT(C)', 'AE(0/1)']].values
y = df[['DCS(MPa)']].values.ravel()

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling
scaler_X = MinMaxScaler()
scaler_y = MinMaxScaler()
X_train = scaler_X.fit_transform(X_train)
X_test = scaler_X.transform(X_test)
y_train = scaler_y.fit_transform(y_train.reshape(-1, 1)).ravel()
y_test = scaler_y.transform(y_test.reshape(-1, 1)).ravel()

# Grid search
rf_grid = {
    'n_estimators': [100, 200],
    'max_depth': [4, 6, 8]
}

rf = GridSearchCV(RandomForestRegressor(random_state=42), rf_grid, scoring='neg_mean_absolute_error', cv=5, n_jobs=-1)
rf.fit(X_train, y_train)

# Evaluation
best_model = rf.best_estimator_
y_pred = best_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
rmse = mean_squared_error(y_test, y_pred, squared=False)
r2 = r2_score(y_test, y_pred)

print(f"\nRandom Forest Performance:")
print(f"MAE: {mae:.4f}")
print(f"RMSE: {rmse:.4f}")
print(f"R²: {r2:.4f}")

# import joblib

# # Save Random Forest model
# joblib.dump(best_model, "rf_model.pkl")
# print("Random Forest model saved as 'rf_model.pkl'")