In [1]:
import pandas as pd

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
data = pd.read_csv('/content/drive/MyDrive/cement_data.csv')


In [4]:
data.head()

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day),"Concrete compressive strength(MPa, megapascals)"
0,540.0,0.0,0.0,162.0,2.5,1040.0,676.0,28,79.986111
1,540.0,0.0,0.0,162.0,2.5,1055.0,676.0,28,61.887366
2,332.5,142.5,0.0,228.0,0.0,932.0,594.0,270,40.269535
3,332.5,142.5,0.0,228.0,0.0,932.0,594.0,365,41.05278
4,198.6,132.4,0.0,192.0,0.0,978.4,825.5,360,44.296075


In [5]:
from sklearn.model_selection import train_test_split

X = data.drop(columns="Concrete compressive strength(MPa, megapascals) ", axis= 1)

y = data["Concrete compressive strength(MPa, megapascals) "]

X_train, X_test, y_train, y_test = train_test_split(X,y, test_size= 0.2)


In [6]:
X_train

Unnamed: 0,Cement (component 1)(kg in a m^3 mixture),Blast Furnace Slag (component 2)(kg in a m^3 mixture),Fly Ash (component 3)(kg in a m^3 mixture),Water (component 4)(kg in a m^3 mixture),Superplasticizer (component 5)(kg in a m^3 mixture),Coarse Aggregate (component 6)(kg in a m^3 mixture),Fine Aggregate (component 7)(kg in a m^3 mixture),Age (day)
57,198.60,132.4,0.00,192.00,0.00,978.4,825.50,3
596,170.30,155.5,0.00,185.70,0.00,1026.6,724.30,7
828,522.00,0.0,0.00,146.00,0.00,896.0,896.00,28
642,250.00,0.0,0.00,182.00,0.00,1100.0,820.00,28
936,236.90,91.7,71.50,246.90,6.00,852.9,695.40,28
...,...,...,...,...,...,...,...,...
414,190.34,0.0,125.18,166.61,9.88,1079.0,798.90,3
609,236.00,0.0,0.00,193.00,0.00,968.0,885.00,180
761,350.00,0.0,0.00,203.00,0.00,974.0,775.00,90
607,236.00,0.0,0.00,194.00,0.00,968.0,885.00,28


In [7]:
# Import necessary libraries
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.impute import KNNImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline
from sklearn.metrics import mean_squared_error, r2_score

# Define the preprocessing pipeline
preprocessor = make_pipeline(KNNImputer(n_neighbors=3), StandardScaler())

# Define the models with their respective hyperparameters to tune
models = {
    'Linear Regression': LinearRegression(),
    'Ridge Regression': Ridge(alpha=1),
    'Lasso Regression': Lasso(alpha=1),
    'Random Forest Regression': RandomForestRegressor(n_estimators=100, max_depth=5),
    'Gradient Boosting Regression': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1)
}

# Fit and evaluate each model
for model_name, model in models.items():
    # Create the pipeline
    pipeline = make_pipeline(preprocessor, model)

    # Fit the pipeline to the training data
    pipeline.fit(X_train, y_train)

    # Evaluate the pipeline on the test data
    y_pred = pipeline.predict(X_test)
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)

    # Print the mean squared error
    print(f'{model_name} - Mean Squared Error: {mse:.2f}')
    print(f'{model_name} - r2 score: {r2:.2f}')


Linear Regression - Mean Squared Error: 125.95
Linear Regression - r2 score: 0.55
Ridge Regression - Mean Squared Error: 125.52
Ridge Regression - r2 score: 0.55
Lasso Regression - Mean Squared Error: 132.25
Lasso Regression - r2 score: 0.53
Random Forest Regression - Mean Squared Error: 47.90
Random Forest Regression - r2 score: 0.83
Gradient Boosting Regression - Mean Squared Error: 36.92
Gradient Boosting Regression - r2 score: 0.87


In [8]:
best_model = RandomForestRegressor()

In [9]:
best_model.set_params()

In [10]:
data.columns

Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')

In [12]:
html_str = """
            <div class="mb-3">

                <label class="form-label"> <b>{column_name} </b></label>
                <input class="form-control" type="number" name="{column_id_name}" />
            </div>
"""

for column in data.columns:
    print(html_str.format(
        column_name = column,
        column_id_name = column.lower().replace(" ","_")

    ))


            <div class="mb-3">
                
                <label class="form-label"> <b>Cement (component 1)(kg in a m^3 mixture) </b></label>
                <input class="form-control" type="number" name="cement_(component_1)(kg_in_a_m^3_mixture)" />
            </div>


            <div class="mb-3">
                
                <label class="form-label"> <b>Blast Furnace Slag (component 2)(kg in a m^3 mixture) </b></label>
                <input class="form-control" type="number" name="blast_furnace_slag_(component_2)(kg_in_a_m^3_mixture)" />
            </div>


            <div class="mb-3">
                
                <label class="form-label"> <b>Fly Ash (component 3)(kg in a m^3 mixture) </b></label>
                <input class="form-control" type="number" name="fly_ash_(component_3)(kg_in_a_m^3_mixture)" />
            </div>


            <div class="mb-3">
                
                <label class="form-label"> <b>Water  (component 4)(kg in a m^3 mixtur