In [1]:
import pandas as pd
import numpy as np

In [3]:
data = pd.read_csv("data/concrete_data.csv")

In [4]:
## we will drop the duplicates
data.drop_duplicates(inplace=True)

In [5]:
X = data.drop(labels=['concrete_compressive_strength'],axis=1)

In [6]:
y = data[['concrete_compressive_strength']]

In [7]:
y

Unnamed: 0,concrete_compressive_strength
0,79.99
1,61.89
2,40.27
3,41.05
4,44.30
...,...
1025,44.28
1026,31.18
1027,23.70
1028,32.77


In [8]:
columns = X.columns
columns

Index(['cement', 'blast_furnace_slag', 'fly_ash', 'water', 'superplasticizer',
       'coarse_aggregate', 'fine_aggregate ', 'age'],
      dtype='object')

In [9]:
from sklearn.preprocessing import StandardScaler, OrdinalEncoder
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer

In [10]:
num_pipline  = Pipeline(
    [
        ('Missing_value_handeler', SimpleImputer()),
        ("Standerd_scler",StandardScaler())
    ]
)

In [11]:
preprocessor=ColumnTransformer(
    [
        
        ('num_pipeline',num_pipline,columns)
    ]
)

In [12]:

from sklearn.model_selection import train_test_split

X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=30)

In [13]:
X_train=pd.DataFrame(preprocessor.fit_transform(X_train),columns=preprocessor.get_feature_names_out())
X_test=pd.DataFrame(preprocessor.transform(X_test),columns=preprocessor.get_feature_names_out())

In [14]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-2.0.2-py3-none-manylinux2014_x86_64.whl.metadata (2.0 kB)
Downloading xgboost-2.0.2-py3-none-manylinux2014_x86_64.whl (297.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m297.1/297.1 MB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m:00:01[0m00:01[0m
[?25hInstalling collected packages: xgboost
Successfully installed xgboost-2.0.2


In [15]:
## Model Training

from sklearn.linear_model import LinearRegression,Lasso,Ridge,ElasticNet
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, AdaBoostRegressor,GradientBoostingRegressor
from xgboost import XGBRegressor
from sklearn.metrics import r2_score,mean_absolute_error,mean_squared_error

In [16]:
import numpy as np
def evaluate_model(true, predicted):
    mae = mean_absolute_error(true, predicted)
    mse = mean_squared_error(true, predicted)
    rmse = np.sqrt(mean_squared_error(true, predicted))
    r2_square = r2_score(true, predicted)
    return mae, rmse, r2_square

In [17]:

models={
    'LinearRegression':LinearRegression(),
    'SVR':SVR(),
    'Knnr':KNeighborsRegressor(),
    'desesiontree': DecisionTreeRegressor(),
    'Random forrest':RandomForestRegressor(),
    'AdaBoostRegressor': AdaBoostRegressor(),
    'GradiaentBosting' : GradientBoostingRegressor(),
    'xgboost' : XGBRegressor()
}

In [18]:
trained_model_list=[]
model_list=[]
r2_list=[]

In [19]:
for i in range(len(list(models))):
    model=list(models.values())[i]
    model.fit(X_train,y_train)

    #Make Predictions
    y_pred=model.predict(X_test)

    #this is a validation(test) score
    mae, rmse, r2_square=evaluate_model(y_test,y_pred)

    print(list(models.keys())[i])
    model_list.append(list(models.keys())[i])

    print('Model Training Performance')
    print("RMSE:",rmse)
    print("MAE:",mae)
    print("R2 score",r2_square*100)

    r2_list.append(r2_square)
    
    print('='*35)
    print('\n')

LinearRegression
Model Training Performance
RMSE: 11.153486340248495
MAE: 8.479892771058282
R2 score 56.58889128720928


SVR
Model Training Performance
RMSE: 10.787851726823463
MAE: 8.151247893207655
R2 score 59.38845276890336


Knnr
Model Training Performance
RMSE: 9.074609042351256
MAE: 6.91630463576159
R2 score 71.2633958548323


desesiontree
Model Training Performance
RMSE: 6.420489642671782
MAE: 4.420066225165563
R2 score 85.61480021768253




  y = column_or_1d(y, warn=True)
  return fit_method(estimator, *args, **kwargs)


Random forrest
Model Training Performance
RMSE: 5.259718512136335
MAE: 3.552373640018921
R2 score 90.34605982862583


AdaBoostRegressor
Model Training Performance
RMSE: 8.090346728672996
MAE: 6.514520694690931
R2 score 77.15906554487425


GradiaentBosting
Model Training Performance
RMSE: 5.302712075875824
MAE: 3.821368725099471
R2 score 90.18758988201878




  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)


xgboost
Model Training Performance
RMSE: 4.931897842227039
MAE: 3.1980546168775748
R2 score 91.51195364474326


