In [1]:
!pip install catboost pandas scikit-learn




In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score


In [3]:
# Load your dataset
data = pd.read_csv('litconcdata.csv')

# Show the first few rows of the dataset to understand its structure
data.head()


Unnamed: 0,Cement,Blast Furnance Slag,Fly Ash,Water,Limestone Powder,Silica Fume,Quartz Powder,Superplasticizer,Coarse Aggregate,Fiber,Nano-Silica,Fine Aggregate,Age,Compressive Strength
0,540.0,0.0,0.0,162.0,0.0,0.0,0.0,2.5,1040.0,0.0,0.0,676.0,28,79.99
1,540.0,0.0,0.0,162.0,0.0,0.0,0.0,2.5,1055.0,0.0,0.0,676.0,28,61.89
2,332.5,142.5,0.0,228.0,0.0,0.0,0.0,0.0,932.0,0.0,0.0,594.0,270,40.27
3,332.5,142.5,0.0,228.0,0.0,0.0,0.0,0.0,932.0,0.0,0.0,594.0,365,41.05
4,198.6,132.4,0.0,192.0,0.0,0.0,0.0,0.0,978.4,0.0,0.0,825.5,360,44.3


In [4]:
# Impute missing values using the mean strategy
imputer = SimpleImputer(strategy='mean')
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Check for any remaining missing values
data_imputed.isnull().sum()


Cement                  0
Blast Furnance Slag     0
Fly Ash                 0
Water                   0
Limestone Powder        0
Silica Fume             0
Quartz Powder           0
Superplasticizer        0
Coarse Aggregate        0
Fiber                   0
Nano-Silica             0
Fine Aggregate          0
Age                     0
Compressive Strength    0
dtype: int64

In [5]:
# Separate the features and target variable
X = data_imputed.drop(columns=['Compressive Strength'])
y = data_imputed['Compressive Strength']

# Display the first few rows of X and y to ensure the separation is correct
X.head(), y.head()


(   Cement   Blast Furnance Slag  Fly Ash  Water  Limestone Powder  \
 0    540.0                  0.0      0.0  162.0               0.0   
 1    540.0                  0.0      0.0  162.0               0.0   
 2    332.5                142.5      0.0  228.0               0.0   
 3    332.5                142.5      0.0  228.0               0.0   
 4    198.6                132.4      0.0  192.0               0.0   
 
    Silica Fume  Quartz Powder  Superplasticizer  Coarse Aggregate   Fiber  \
 0          0.0            0.0               2.5             1040.0    0.0   
 1          0.0            0.0               2.5             1055.0    0.0   
 2          0.0            0.0               0.0              932.0    0.0   
 3          0.0            0.0               0.0              932.0    0.0   
 4          0.0            0.0               0.0              978.4    0.0   
 
    Nano-Silica  Fine Aggregate    Age  
 0          0.0           676.0   28.0  
 1          0.0           

In [6]:
# Standardize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Check the transformed features
X_scaled[:5]  # Displaying the first 5 rows of scaled data


array([[ 0.58823188, -0.45752958, -0.5743619 , -0.57511062, -0.14117179,
        -0.40147228, -0.18225782, -0.6466626 ,  0.54080759, -0.29086898,
        -0.21172498, -0.28359272, -0.117069  ],
       [ 0.58823188, -0.45752958, -0.5743619 , -0.57511062, -0.14117179,
        -0.40147228, -0.18225782, -0.6466626 ,  0.5984478 , -0.29086898,
        -0.21172498, -0.28359272, -0.117069  ],
       [-0.39077794,  1.36263393, -0.5743619 ,  1.32802785, -0.14117179,
        -0.40147228, -0.18225782, -0.83638416,  0.12579805, -0.29086898,
        -0.21172498, -0.64525027,  4.41163451],
       [-0.39077794,  1.36263393, -0.5743619 ,  1.32802785, -0.14117179,
        -0.40147228, -0.18225782, -0.83638416,  0.12579805, -0.29086898,
        -0.21172498, -0.64525027,  6.18943134],
       [-1.02253416,  1.23362585, -0.5743619 ,  0.28995232, -0.14117179,
        -0.40147228, -0.18225782, -0.83638416,  0.30409844, -0.29086898,
        -0.21172498,  0.37577073,  6.09586309]])

In [7]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Display the shape of the training and testing sets
X_train.shape, X_test.shape


((2996, 13), (749, 13))

In [8]:
# Initialize the CatBoost Regressor
model = CatBoostRegressor(iterations=1000, learning_rate=0.1, depth=6, verbose=200)

# Train the model on the training data
model.fit(X_train, y_train)


0:	learn: 38.8592723	total: 149ms	remaining: 2m 28s
200:	learn: 4.8044156	total: 590ms	remaining: 2.34s
400:	learn: 3.8411745	total: 1.06s	remaining: 1.59s
600:	learn: 3.2943005	total: 1.54s	remaining: 1.02s
800:	learn: 2.9736452	total: 2.01s	remaining: 499ms
999:	learn: 2.7386167	total: 2.48s	remaining: 0us


<catboost.core.CatBoostRegressor at 0x17b43d1c908>

In [9]:
# Make predictions on the test set
y_pred = model.predict(X_test)

# Display the first few predicted values
y_pred[:5]


array([ 46.40895343, 140.16453983,  42.5007577 ,  58.29581729,
        47.64807178])

In [10]:
# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R² Score: {r2}")


Mean Squared Error: 25.12604392672313
R² Score: 0.9857103216058901


In [12]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from catboost import CatBoostRegressor
import pickle

# Load the dataset
data = pd.read_csv('litconcdata.csv')

# Handle missing values
imputer = SimpleImputer(strategy='mean')
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Separate features and target variable
X = data_imputed.drop(columns=['Compressive Strength'])
y = data_imputed['Compressive Strength']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the CatBoost Regressor model
model = CatBoostRegressor(iterations=1000, learning_rate=0.1, depth=6, verbose=200)
model.fit(X_train, y_train)

# Save the trained model and scaler for later use
with open('catboost_model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)


0:	learn: 38.8592723	total: 2.93ms	remaining: 2.93s
200:	learn: 4.8044156	total: 494ms	remaining: 1.96s
400:	learn: 3.8411745	total: 963ms	remaining: 1.44s
600:	learn: 3.2943005	total: 1.43s	remaining: 951ms
800:	learn: 2.9736452	total: 1.91s	remaining: 476ms
999:	learn: 2.7386167	total: 2.41s	remaining: 0us


In [13]:
# Load the saved model and scaler
with open('catboost_model.pkl', 'rb') as f:
    model = pickle.load(f)

with open('scaler.pkl', 'rb') as f:
    scaler = pickle.load(f)

# Function to predict compressive strength based on input features
def predict_compressive_strength(input_values):
    """
    Takes input values as a dictionary, applies scaling, and predicts the compressive strength.
    """
    # Convert input to DataFrame (assuming input values are in the same order as the dataset)
    input_df = pd.DataFrame([input_values])
    
    # Scale the input values using the scaler that was used during training
    input_scaled = scaler.transform(input_df)
    
    # Predict the compressive strength
    predicted_strength = model.predict(input_scaled)
    
    return predicted_strength[0]


In [14]:
# Sample input data - replace with actual values
input_data = {
    'Cement': 400,                # Cement (in kg)
    'Blast Furnace Slag': 120,    # Blast Furnace Slag (in kg)
    'Fly Ash': 60,                # Fly Ash (in kg)
    'Water': 180,                 # Water (in kg)
    'Limestone Powder': 100,       # Limestone Powder (in kg)
    'Silica Fume': 5,             # Silica Fume (in kg)
    'Quartz Powder': 10,          # Quartz Powder (in kg)
    'Superplasticizer': 2,        # Superplasticizer (in kg)
    'Coarse Aggregate': 800,      # Coarse Aggregate (in kg)
    'Fiber': 3,                   # Fiber (in kg)
    'Nano-Silica': 2,             # Nano-Silica (in kg)
    'Fine Aggregate': 600,        # Fine Aggregate (in kg)
    'Age': 28                     # Age of the concrete mix (in days)
}

# Get the predicted compressive strength
predicted_strength = predict_compressive_strength(input_data)

# Display the predicted compressive strength
print(f"The predicted compressive strength is: {predicted_strength} MPa")


The predicted compressive strength is: 61.01137047727537 MPa


Feature names unseen at fit time:
- Blast Furnace Slag
- Cement
- Coarse Aggregate
Feature names seen at fit time, yet now missing:
- Blast Furnance Slag
- Cement 
- Coarse Aggregate 



In [16]:
# Import necessary libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.impute import SimpleImputer
from catboost import CatBoostRegressor
from sklearn.metrics import mean_squared_error, r2_score
import pickle

# Load the dataset
data = pd.read_csv('litconcdata.csv')

# Handle missing values
imputer = SimpleImputer(strategy='mean')
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

# Separate features and target variable
X = data_imputed.drop(columns=['Compressive Strength'])
y = data_imputed['Compressive Strength']

# Feature scaling
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train the CatBoost Regressor model
model = CatBoostRegressor(iterations=1000, learning_rate=0.1, depth=6, verbose=200)
model.fit(X_train, y_train)

# Save the trained model and scaler for later use
with open('catboost_model.pkl', 'wb') as f:
    pickle.dump(model, f)

with open('scaler.pkl', 'wb') as f:
    pickle.dump(scaler, f)

# Step 1: Evaluate the model on test data
y_pred = model.predict(X_test)

# Calculate the Mean Squared Error (MSE) and R² score
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

# Display the evaluation metrics
print(f"Mean Squared Error (MSE): {mse}")
print(f"R² Score: {r2}")

# Step 2: Function to predict compressive strength based on user input
def predict_compressive_strength(input_values):
    """
    Takes input values as a dictionary, applies scaling, and predicts the compressive strength.
    """
    # Convert input to DataFrame (assuming input values are in the same order as the dataset)
    input_df = pd.DataFrame([input_values])
    
    # Scale the input values using the scaler that was used during training
    input_scaled = scaler.transform(input_df)
    
    # Predict the compressive strength
    predicted_strength = model.predict(input_scaled)
    
    return predicted_strength[0]

# Example: Using the function to predict compressive strength based on new input
input_data = {
    'Cement': 400,                # Cement (in kg)
    'Blast Furnace Slag': 120,    # Blast Furnace Slag (in kg)
    'Fly Ash': 60,                # Fly Ash (in kg)
    'Water': 180,                 # Water (in kg)
    'Limestone Powder': 100,       # Limestone Powder (in kg)
    'Silica Fume': 5,             # Silica Fume (in kg)
    'Quartz Powder': 10,          # Quartz Powder (in kg)
    'Superplasticizer': 2,        # Superplasticizer (in kg)
    'Coarse Aggregate': 800,      # Coarse Aggregate (in kg)
    'Fiber': 3,                   # Fiber (in kg)
    'Nano-Silica': 2,             # Nano-Silica (in kg)
    'Fine Aggregate': 600,        # Fine Aggregate (in kg)
    'Age': 28                     # Age of the concrete mix (in days)
}

# Get the predicted compressive strength
predicted_strength = predict_compressive_strength(input_data)

# Display the predicted compressive strength
print(f"The predicted compressive strength is: {predicted_strength} MPa")


0:	learn: 38.8592723	total: 2.96ms	remaining: 2.95s
200:	learn: 4.8044156	total: 489ms	remaining: 1.95s
400:	learn: 3.8411745	total: 952ms	remaining: 1.42s
600:	learn: 3.2943005	total: 1.43s	remaining: 948ms
800:	learn: 2.9736452	total: 1.91s	remaining: 475ms
999:	learn: 2.7386167	total: 2.42s	remaining: 0us
Mean Squared Error (MSE): 25.12604392672313
R² Score: 0.9857103216058901
The predicted compressive strength is: 61.01137047727537 MPa


Feature names unseen at fit time:
- Blast Furnace Slag
- Cement
- Coarse Aggregate
Feature names seen at fit time, yet now missing:
- Blast Furnance Slag
- Cement 
- Coarse Aggregate 



In [18]:
max_values = data.drop(columns=['Compressive Strength']).max()
min_values = data.drop(columns=['Compressive Strength']).min()

# Combine max and min values into a DataFrame for easy display
min_max_df = pd.DataFrame({
    'Max Value': max_values,
    'Min Value': min_values
})

# Display the table
print("Maximum and Minimum values for each input feature:")
print(min_max_df)

Maximum and Minimum values for each input feature:
                     Max Value  Min Value
Cement                 1251.20     102.00
Blast Furnance Slag     440.00       0.00
Fly Ash                 356.00       0.00
Water                   476.00      72.00
Limestone Powder       1058.20       0.00
Silica Fume             433.70       0.00
Quartz Powder           397.00       0.00
Superplasticizer         57.00       0.00
Coarse Aggregate       1398.00       0.00
Fiber                   234.00       0.00
Nano-Silica              58.59       0.00
Fine Aggregate         1757.80     163.12
Age                     365.00       1.00
