In [1]:
import pandas as pd

df = pd.read_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/final_concrete_dataset.csv")

In [2]:
target = "compressive_strength(MPa)"
features = [c for c in df.columns if c != target]

X = df[features]
y = df[target]

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42
)


In [4]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_model(y_true, y_pred):
    return {
        "RMSE": np.sqrt(mean_squared_error(y_true, y_pred)),
        "MAE": mean_absolute_error(y_true, y_pred),
        "R2": r2_score(y_true, y_pred)
    }


In [5]:
from sklearn.linear_model import LinearRegression

lr = LinearRegression()
lr.fit(X_train, y_train)

y_pred_lr = lr.predict(X_test)
lr_metrics = evaluate_model(y_test, y_pred_lr)

lr_metrics

{'RMSE': np.float64(6.464954871088242),
 'MAE': 4.6763300564407055,
 'R2': 0.7056560147664795}

In [6]:
from sklearn.linear_model import Ridge

ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

y_pred_ridge = ridge.predict(X_test)
ridge_metrics = evaluate_model(y_test, y_pred_ridge)

ridge_metrics


{'RMSE': np.float64(6.464954779149948),
 'MAE': 4.676329926183086,
 'R2': 0.7056560231382267}

In [7]:
from sklearn.linear_model import Lasso

lasso = Lasso(alpha=0.01, max_iter=10000)
lasso.fit(X_train, y_train)

y_pred_lasso = lasso.predict(X_test)
lasso_metrics = evaluate_model(y_test, y_pred_lasso)

lasso_metrics


{'RMSE': np.float64(6.464969617661123),
 'MAE': 4.6763248239471125,
 'R2': 0.7056546719667022}

In [8]:
results = pd.DataFrame([
    {"Model": "Linear Regression", **lr_metrics},
    {"Model": "Ridge Regression", **ridge_metrics},
    {"Model": "Lasso Regression", **lasso_metrics},
])

results

results.to_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/baseline_model_results.csv", index=False)


In [9]:
coef_df = pd.DataFrame({
    "Feature": features,
    "Coefficient": lr.coef_
}).sort_values(by="Coefficient", key=abs, ascending=False)

coef_df.head(10)


Unnamed: 0,Feature,Coefficient
4,foaming_agent(kg/m3),-0.060254
3,water(kg/m3),-0.047762
0,binder(kg/m3),0.046965
6,period_of_testing(Days),0.010661
2,fine_aggregate(kg/m3),0.006561
5,measured_density(kg/m3),0.00423
1,pozzolan(kg/m3),0.000657


In [10]:
coef_df.to_csv("/home/mukundvinayak/machine-learning/Concrete_mixing_analysis/misc_datas/linear_regression_coefficients.csv", index=False)
