# 🧠 AutoML for Lightweight Concrete (Ca + Natural Fibers) - Extended Analysis

In [None]:

# 📦 Required Libraries
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
from pycaret.regression import *
import shap
import warnings
warnings.filterwarnings("ignore")

# 📊 Load Dataset
data = {
    "Ca_Replacement(%)": [20]*6 + [30]*6 + [40]*6 + [50]*6 + [60]*6,
    "Fibre": ["CO", "HE", "BA", "KE", "JU", "FX"] * 5,
    "Stress": [
        38.0036, 36.0356, 30.7876, 26.7860, 32.7228, 33.3460,
        33.0836, 37.4296, 28.6556, 27.9340, 25.0476, 27.5240,
        28.5408, 25.4740, 26.9664, 22.3908, 24.4572, 19.6520,
        17.8316, 26.8352, 23.6208, 22.7844, 21.1116, 19.8160,
        23.7028, 24.1620, 19.1600, 22.0300, 21.8168, 20.0948
    ],
    "Split": [
        4.4347, 4.2955, 3.9084, 3.5951, 4.0540, 4.1001,
        4.0807, 4.3944, 3.7437, 3.6868, 3.4533, 3.6542,
        3.7346, 3.4884, 3.6096, 3.2286, 3.4042, 2.9855,
        2.8163, 3.5991, 3.3339, 3.2625, 3.1166, 3.0004,
        3.3408, 3.3795, 2.9404, 3.1973, 3.1787, 3.0257
    ],
    "Flexure": [
        4.3153, 4.2021, 3.8841, 3.6229, 4.0043, 4.0422,
        4.0263, 4.2826, 3.7472, 3.6997, 3.5033, 3.6724,
        3.7397, 3.5330, 3.6350, 3.3123, 3.4618, 3.1031,
        2.9559, 3.6262, 3.4021, 3.3413, 3.2163, 3.1161,
        3.4080, 3.4408, 3.0640, 3.2855, 3.2696, 3.1379
    ],
    "Sorptivity": [
        0.14, 0.13, 0.12, 0.11, 0.12, 0.15,
        0.156, 0.168, 0.144, 0.132, 0.144, 0.18,
        0.182, 0.169, 0.156, 0.143, 0.156, 0.195,
        0.196, 0.182, 0.168, 0.154, 0.168, 0.21,
        0.224, 0.208, 0.192, 0.176, 0.192, 0.24
    ]
}

df = pd.DataFrame(data)
df["Fibre"] = LabelEncoder().fit_transform(df["Fibre"])

# 📈 Data Distribution and Correlation
df.hist(bins=15, figsize=(12, 8))
plt.suptitle('Distribution of Input Parameters and Targets')
plt.tight_layout()
plt.show()

corr = df.corr(method='pearson')
plt.figure(figsize=(10, 7))
sns.heatmap(corr, annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Pearson Correlation Heatmap")
plt.show()

# 🤖 AutoML and Evaluation for All Targets
results = {}
targets = ["Stress", "Split", "Flexure", "Sorptivity"]

for target in targets:
    print(f"\n========= AutoML for: {target} =========")
    s = setup(df, target=target, session_id=123, silent=True, fold=10, verbose=False)
    best = compare_models()
    tuned = tune_model(best, search_library='scikit-optimize', search_algorithm='random')
    final = finalize_model(tuned)
    results[target] = final

    # Evaluation Plots
    plot_model(tuned, plot='error')
    plot_model(tuned, plot='residuals')
    plot_model(tuned, plot='learning')
    plot_model(tuned, plot='feature')

    # SHAP summary
    explainer = shap.Explainer(final.predict, df.drop(columns=targets))
    shap_values = explainer(df.drop(columns=targets))
    shap.summary_plot(shap_values, df.drop(columns=targets))
