In [2]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_score

df = pd.read_csv("concrete.csv")
df.head()

Unnamed: 0,Cement,BlastFurnaceSlag,FlyAsh,Water,Superplasticizer,CoarseAggregate,FineAggregate,Age,CompressiveStrength
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.3


**Baseline Score:**

- Check adding additional synthetic features can help model learn important relationships.

- 1st step: Establish baseline -> by training model on un-augmented dataset to determine whether new features are actually useful.

- Decide -> usefulness of new features


In [9]:
X = df.copy()
y = X.pop("CompressiveStrength") #remove "CompressiveStrength" column from 'X'

# Train and Score "Baseline" Model
baseline = RandomForestRegressor(criterion="absolute_error", random_state=0) #RandomForestRegressor: ensemble model, fits # of decision tree regressors on dataset samples & uses averaging -> controls overfitting
baseline_score = cross_val_score(
    baseline, X, y, cv=5, scoring="neg_mean_absolute_error" #determines how well model generalizes to independent dataset
)
baseline_score = -1 * baseline_score.mean() #mean of cross-validated score

print(f"MAE Baseline Score: {baseline_score:.4}")

MAE Baseline Score: 8.232


**New Ratio Features:**

In [10]:
X = df.copy()
y = X.pop("CompressiveStrength")

# Create synthetic features
X["FCRatio"] = X["FineAggregate"] / X["CoarseAggregate"]
X["AggCmtRatio"] = (X["CoarseAggregate"] + X["FineAggregate"]) / X["Cement"]
X["WtrCmtRatio"] = X["Water"] / X["Cement"]

# Train and score model on dataset with additional ratio features
model = RandomForestRegressor(criterion="absolute_error", random_state=0)
score = cross_val_score(
    model, X, y, cv=5, scoring="neg_mean_absolute_error"
)
score = -1 * score.mean()

print(f"MAE Score with Ratio Features: {score:.4}")

MAE Score with Ratio Features: 7.948
