___

<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="CLRSWY"></p>

___

# Concrete Slump Test Regression

The concrete slump test measures the consistency of fresh concrete before it sets. It is performed to check the workability of freshly made concrete, and therefore the ease with which concrete flows. It can also be used as an indicator of an improperly mixed batch.

<img src="https://i0.wp.com/civiconcepts.com/wp-content/uploads/2019/08/Slump-Cone-test-of-concrete.jpg?fit=977%2C488&ssl=1">

Our data set consists of various cement properties and the resulting slump test metrics in cm. Later on the set concrete is tested for its compressive strength 28 days later.

Input variables (9):

(component kg in one M^3 concrete)(7):
* Cement
* Slag
* Fly ash
* Water
* SP
* Coarse Aggr.
* Fine Aggr.

(Measurements)(2)
* SLUMP (cm)
* FLOW (cm)

Target variable (1):
* **28-day Compressive Strength (Mpa)**

Data Source: https://archive.ics.uci.edu/ml/datasets/Concrete+Slump+Test

*Credit: Yeh, I-Cheng, "Modeling slump flow of concrete using second-order regressions and artificial neural networks," Cement and Concrete Composites, Vol.29, No. 6, 474-480, 2007.*

# Importing dependencies

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams["figure.figsize"] = (10,6)
pd.set_option('display.max_columns', 100)

# Loading data and EDA

In [2]:
df = pd.read_csv("cement_slump.csv")
df.head()

FileNotFoundError: [Errno 2] No such file or directory: 'cement_slump.csv'

In [None]:
df.info()

In [None]:
df.describe().T

In [None]:
df.corr()['Compressive Strength (28-day)(Mpa)']

## Graphical analysis

In [None]:
plt.figure(figsize=(20,10))
sns.heatmap(df.corr(), annot = True, vmin=-1, vmax=1)

In [None]:
sns.pairplot(df)

In [None]:
plt.figure(figsize =(20,10))
df.boxplot()

### Train | Test Split

In [None]:
X = df.drop("Compressive Strength (28-day)(Mpa)", axis =1)
y = df["Compressive Strength (28-day)(Mpa)"]

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) 

## Scaling the Data

In [None]:
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler


In [None]:
scaler = StandardScaler() # will be used in pipeline later

In [None]:
# if you don't use pipeline, you can use scaler directly
scaler.fit(X_train)
X_train_scaled = scaler.transform(X_train) 
X_test_scaled = scaler.transform(X_test)

## Pipeline

## Linear Regression

In [None]:
from sklearn.pipeline import Pipeline # pipeline is used to combine scaler and model

In [None]:
from sklearn.linear_model import LinearRegression

lm = LinearRegression() # will be used in pipeline later

In [None]:
pipe_lm = Pipeline([("scaler", scaler), ("lm", lm)]) # pipeline is used to combine scaler and model

In [None]:
pipe_lm.fit(X_train, y_train)

In [None]:
y_pred = pipe_lm.predict(X_test) # predict on test data
y_train_pred = pipe_lm.predict(X_train) # predict on train data

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

def train_val(y_train, y_train_pred, y_test, y_pred, model_name):
    
    scores = {model_name+"_train": {"R2" : r2_score(y_train, y_train_pred),
    "mae" : mean_absolute_error(y_train, y_train_pred),
    "mse" : mean_squared_error(y_train, y_train_pred),                          
    "rmse" : np.sqrt(mean_squared_error(y_train, y_train_pred))},
    
    model_name+"_test": {"R2" : r2_score(y_test, y_pred),
    "mae" : mean_absolute_error(y_test, y_pred),
    "mse" : mean_squared_error(y_test, y_pred),
    "rmse" : np.sqrt(mean_squared_error(y_test, y_pred))}}
    
    return pd.DataFrame(scores)

In [None]:
ls =train_val(y_train, y_train_pred, y_test, y_pred, "linear") # train and test scores
ls

## Cross Validate

In [None]:
#from sklearn.metrics import SCORERS
#list(SCORERS.keys())

In [None]:
from sklearn.model_selection import cross_validate, cross_val_score

In [None]:
model = Pipeline([("scaler", scaler), ("lm", lm)])
scores = cross_validate(model, X_train, y_train, scoring = ['r2', 'neg_mean_absolute_error','neg_mean_squared_error', \
                                                            'neg_root_mean_squared_error'], cv = 5)

In [None]:
pd.DataFrame(scores, index = range(1,6))

In [None]:
scores = pd.DataFrame(scores, index=range(1,6))
scores.iloc[:, 2:].mean()


In [None]:
train_val(y_train, y_train_pred, y_test, y_pred, "linear")

In [None]:
print("train RMSE:", 2.423698/df["Compressive Strength (28-day)(Mpa)"].mean())
print("CV RMSE:", 2.737927/df["Compressive Strength (28-day)(Mpa)"].mean())

In [None]:
pipe_lm["lm"].coef_

In [None]:
lm_df = pd.DataFrame(pipe_lm["lm"].coef_, columns = ["lm_coef"])
lm_df

## Ridge Regression

In [None]:
from sklearn.linear_model import Ridge

In [None]:
ridge_model = Ridge(alpha=1, random_state=42) # will be used in pipeline later


In [None]:
pipe_ridge = Pipeline([("scaler", scaler), ("ridge", ridge_model)]) # pipeline is used to combine scaler and model

In [None]:
pipe_ridge.fit(X_train, y_train)

In [None]:
y_pred = pipe_ridge.predict(X_test)
y_train_pred = pipe_ridge.predict(X_train)

In [None]:
rs = train_val(y_train, y_train_pred, y_test, y_pred, "ridge")
rs

In [None]:
pd.concat([ls, rs], axis=1)  # combine train and test scores to compare

## For Ridge Regression CV with alpha : 1 

In [None]:
model = Pipeline([("scaler", scaler), ("ridge", ridge_model)])
scores = cross_validate(model, X_train, y_train,
                    scoring=['r2', 'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv=5)

In [None]:
pd.DataFrame(scores, index = range(1, 6))

In [None]:
scores = pd.DataFrame(scores, index = range(1, 6))
scores.iloc[:,2:].mean()

In [None]:
train_val(y_train, y_train_pred, y_test, y_pred, "ridge")

In [None]:
pipe_ridge["ridge"].coef_

In [None]:
rm_df = pd.DataFrame(pipe_ridge["ridge"].coef_, columns = ["ridge_coef_1"])

In [None]:
pd.concat([lm_df,rm_df], axis = 1) 

## Choosing best alpha value with Cross-Validation

In [None]:
from sklearn.linear_model import RidgeCV

In [None]:
alpha_space = np.linspace(0.1, 1, 100)
alpha_space

In [None]:
ridge_cv_model = RidgeCV(alphas=alpha_space, cv = 10, scoring= "neg_root_mean_squared_error") # will be used in pipeline later

In [None]:
pipe_ridgecv = Pipeline([("scaler", scaler), ("ridgecv", ridge_cv_model)]) # pipeline is used to combine scaler and model

In [None]:
pipe_ridgecv.fit(X_train, y_train)

In [None]:
pipe_ridgecv["ridgecv"].alpha_

In [None]:
# Ridge( alpha = 0.91)
y_pred = pipe_ridgecv.predict(X_test)
y_train_pred = pipe_ridgecv.predict(X_train)  

In [None]:
rcs = train_val(y_train, y_train_pred, y_test, y_pred, "ridge_cv") 
rcs

In [None]:
pd.concat([ls, rs, rcs], axis = 1)

In [None]:
pipe_ridgecv["ridgecv"].coef_

In [None]:
rcm_df = pd.DataFrame(pipe_ridgecv["ridgecv"].coef_, columns=["ridge_cv_coef_0.91"])

In [None]:
pd.concat([lm_df,rm_df, rcm_df], axis = 1) 

## LASSO

In [None]:
from sklearn.linear_model import Lasso, LassoCV

In [None]:
lasso_model = Lasso(alpha=1, random_state=42)

In [None]:
pipe_lasso = Pipeline([("scaler", scaler), ("lasso", lasso_model)]) # pipeline is used to combine scaler and model

In [None]:
pipe_lasso.fit(X_train, y_train)

In [None]:
y_pred = pipe_lasso.predict(X_test)
y_train_pred = pipe_lasso.predict(X_train)

In [None]:
lss = train_val(y_train, y_train_pred, y_test, y_pred, "lasso") 

lss

In [None]:
pd.concat([ls, rs, rcs, lss], axis = 1)

## For Lasso CV with Default Alpha : 1

In [None]:
model = Pipeline([("scaler", scaler), ("lasso", lasso_model)])
scores = cross_validate(model, X_train, y_train,
                        scoring=['r2', 'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv=5)

In [None]:
pd.DataFrame(scores, index = range(1, 6))

In [None]:
scores = pd.DataFrame(scores, index = range(1, 6))
scores.iloc[:,2:].mean()

In [None]:
train_val(y_train, y_train_pred, y_test, y_pred, "lasso")

In [None]:
model["lasso"].coef_

In [None]:
lsm_df = pd.DataFrame(model["lasso"].coef_, columns = ["lasso_coef_1"])

In [None]:
pd.concat([lm_df, rm_df, rcm_df, lsm_df], axis = 1) 

### Choosing best alpha value with Cross-Validation

In [None]:
lasso_cv_model = LassoCV(alphas = alpha_space, cv = 10, max_iter=100000, random_state=42) # will be used in pipeline later

In [None]:
pipe_lassocv = Pipeline([("scaler", scaler), ("lassocv", lasso_cv_model)]) # pipeline is used to combine scaler and model

In [None]:
pipe_lassocv.fit(X_train, y_train)

In [None]:
pipe_lassocv["lassocv"].alpha_

In [None]:
# Lasso(alpha =0.1)
y_pred = pipe_lassocv.predict(X_test)   
y_train_pred = pipe_lassocv.predict(X_train)

In [None]:
lcs = train_val(y_train, y_train_pred, y_test, y_pred, "lasso_cv")
lcs

In [None]:
pd.concat([ls,rs, rcs, lss, lcs], axis = 1)

In [None]:
pipe_lassocv["lassocv"].coef_

In [None]:
lcm_df = pd.DataFrame(pipe_lassocv["lassocv"].coef_, columns = ["lasso_cv_coef_0.1"])

In [None]:
pd.concat([lm_df, rm_df, rcm_df, lsm_df, lcm_df], axis = 1) # (7 fearures ile) test_r2 = 0.90

## Elastic net

In [None]:
from sklearn.linear_model import ElasticNet, ElasticNetCV

In [None]:
elastic_model = ElasticNet(alpha=1, l1_ratio=0.5, random_state=42) # l1_ratio is used to control the amount of L1 and L2 regularization

In [None]:
pipe_elastic = Pipeline([("scaler", scaler), ("elastic", elastic_model)]) # pipeline is used to combine scaler and model

In [None]:

pipe_elastic.fit(X_train, y_train)

In [None]:
y_pred = pipe_elastic.predict(X_test)
y_train_pred = pipe_elastic.predict(X_train)

In [None]:
es = train_val(y_train, y_train_pred, y_test, y_pred, "elastic")
es

In [None]:
pd.concat([ls,rs, rcs, lss, lcs, es], axis = 1)

## For Elastic_net CV with Default alpha = 1 and l1_ratio=0.5

In [None]:
model = Pipeline([("scaler", scaler), ("elastic", ElasticNet(alpha=1, l1_ratio=0.5, random_state=42))])

scores = cross_validate(model, X_train, y_train,
                        scoring=['r2', 'neg_mean_absolute_error','neg_mean_squared_error','neg_root_mean_squared_error'], cv=5)

In [None]:
scores = pd.DataFrame(scores, index = range(1, 6))
scores.iloc[:,2:]

In [None]:
scores = pd.DataFrame(scores, index = range(1, 11))
scores.iloc[:,2:].mean()

In [None]:
train_val(y_train, y_train_pred, y_test, y_pred, "elastic")

In [None]:
pipe_elastic["elastic"].coef_

In [None]:
em_df = pd.DataFrame(pipe_elastic["elastic"].coef_, columns=["elastic_coef_(alp:1, L1:0.5)"])

In [None]:
pd.concat([lm_df, rm_df, rcm_df, lsm_df, lcm_df, em_df], axis = 1)

## Grid Search for ElasticNet


In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
elastic_model = ElasticNet(max_iter=10000, random_state=42) 

In [None]:
pipe_elastic = Pipeline([("scaler", scaler), ("elastic", elastic_model)]) # pipeline is used to combine scaler and model

In [None]:
param_grid = {"elastic__alpha":alpha_space,
            "elastic__l1_ratio":[0.1, 0.5, 0.7,0.9, 0.95, 1]}

In [None]:
grid_model = GridSearchCV(estimator = pipe_elastic, param_grid = param_grid, scoring = 'neg_root_mean_squared_error',
                         cv =10, verbose =2)

In [None]:
grid_model.fit(X_train, y_train)

In [None]:
grid_model.best_params_

In [None]:
y_pred = grid_model.predict(X_test)
y_train_pred = grid_model.predict(X_train)

In [None]:
gm = train_val(y_train, y_train_pred, y_test, y_pred, "elastic_grid")
gm


In [None]:
pd.concat([ls,rs, rcs, lss, lcs, es, gm], axis = 1)

## Feature importances with Ridge

In [None]:
from yellowbrick.model_selection import FeatureImportances 

model = Ridge(alpha=pipe_ridgecv["ridgecv"].alpha_)  # ridge_cv_model.alpha_ = 0.91
viz = FeatureImportances(model,labels=list(X.columns),relative=False)
viz.fit(X_train_scaled,y_train)
viz.show()



## Feature importances with Lasso

In [None]:
pipe_lassocv["lassocv"].alpha_

In [None]:
from yellowbrick.model_selection import FeatureImportances

model = Lasso(alpha=pipe_lassocv["lassocv"].alpha_)  # lasso_cv_model.alpha_ = 0.1
viz = FeatureImportances(model,labels=list(X.columns),relative=False)
viz.fit(X_train_scaled,y_train)
viz.show()

___

<p style="text-align: center;"><img src="https://docs.google.com/uc?id=1lY0Uj5R04yMY3-ZppPWxqCr5pvBLYPnV" class="img-fluid" alt="CLRSWY"></p>

___