In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split

df = pd.read_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/final_concrete_dataset.csv")

target = "compressive_strength(MPa)"
features = [c for c in df.columns if c != target]

X = df[features]
y = df[target]

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [2]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_model(y_true, y_pred):
    return {
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "MAE": mean_absolute_error(y_true, y_pred),
        "R2": r2_score(y_true, y_pred)
    }

In [3]:
from sklearn.tree import DecisionTreeRegressor

dt = DecisionTreeRegressor(
    max_depth=6,
    random_state=42
)

dt.fit(X_train, y_train)
y_pred_dt = dt.predict(X_test)

dt_metrics = evaluate_model(y_test, y_pred_dt)
dt_metrics


{'RMSE': np.float64(3.440568023891905),
 'MAE': 1.7554218077003871,
 'R2': 0.916634863052756}

In [4]:
from sklearn.ensemble import RandomForestRegressor

rf = RandomForestRegressor(
    n_estimators=300,
    max_depth=None,
    random_state=42,
    n_jobs=-1
)

rf.fit(X_train, y_train)
y_pred_rf = rf.predict(X_test)

rf_metrics = evaluate_model(y_test, y_pred_rf)
rf_metrics


{'RMSE': np.float64(3.4126686141112255),
 'MAE': 1.3799093127355806,
 'R2': 0.9179813895816433}

In [5]:
from sklearn.ensemble import GradientBoostingRegressor

gbr = GradientBoostingRegressor(
    n_estimators=300,
    learning_rate=0.05,
    max_depth=3,
    random_state=42
)

gbr.fit(X_train, y_train)
y_pred_gbr = gbr.predict(X_test)

gbr_metrics = evaluate_model(y_test, y_pred_gbr)
gbr_metrics


{'RMSE': np.float64(2.935980469338634),
 'MAE': 1.4506300406090726,
 'R2': 0.9392941569233392}

In [6]:
tree_results = pd.DataFrame([
    {"Model": "Decision Tree", **dt_metrics},
    {"Model": "Random Forest", **rf_metrics},
    {"Model": "Gradient Boosting", **gbr_metrics},
])


linear_results_df = pd.read_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/baseline_model_results.csv")

results = pd.concat([linear_results_df, tree_results], ignore_index=True)

results.to_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/all_model_results.csv", index=False)

tree_results

Unnamed: 0,Model,RMSE,MAE,R2
0,Decision Tree,3.440568,1.755422,0.916635
1,Random Forest,3.412669,1.379909,0.917981
2,Gradient Boosting,2.93598,1.45063,0.939294


In [7]:
rf_importance = pd.DataFrame({
    "Feature": features,
    "MDG": rf.feature_importances_
}).sort_values(by="MDG", ascending=False)

rf_importance


Unnamed: 0,Feature,MDG
0,binder(kg/m3),0.657376
2,fine_aggregate(kg/m3),0.147981
5,measured_density(kg/m3),0.102786
4,foaming_agent(kg/m3),0.030932
6,period_of_testing(Days),0.029865
3,water(kg/m3),0.023171
1,pozzolan(kg/m3),0.007889


In [8]:
from sklearn.inspection import permutation_importance

perm = permutation_importance(
    rf, X_test, y_test,
    n_repeats=20,
    random_state=42,
    n_jobs=-1
)

mda_importance = pd.DataFrame({
    "Feature": features,
    "MDA": perm.importances_mean
}).sort_values(by="MDA", ascending=False)

mda_importance


Unnamed: 0,Feature,MDA
0,binder(kg/m3),0.952806
2,fine_aggregate(kg/m3),0.173516
5,measured_density(kg/m3),0.099491
6,period_of_testing(Days),0.075489
4,foaming_agent(kg/m3),0.04456
3,water(kg/m3),0.035547
1,pozzolan(kg/m3),0.010301


In [9]:
pd.Series(y_test).to_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/y_test_values.csv", index=False)
pd.Series(y_pred_gbr).to_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/y_pred_gbr_values.csv", index=False)
rf_importance.to_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/rf_feature_importances.csv", index=False)
mda_importance.to_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/mda_feature_importances.csv", index=False)
