In [None]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from sklearn.model_selection import train_test_split
import seaborn as sns

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

sns.set(style="ticks")
#sns.set(style="whitegrid", color_codes=True)

In [None]:
data = pd.read_excel('BFRC_STS.xlsx')


In [None]:
print(data.shape)

(267, 11)


In [None]:
data.describe()

Unnamed: 0,Cement (kg/m³),Fly ash (kg/m³),Silica fume (kg/m³),Coarse aggregate (kg/m³),Fine aggregate (kg/m³),Water (kg/m³),Water reducing agent (kg/m³),Fiber diameter (mm),Fiber length (mm),Fiber content (%),Splitting tensile strength (Mpa)
count,267.0,267.0,267.0,267.0,267.0,267.0,267.0,267.0,267.0,267.0,267.0
mean,402.544719,45.717453,16.429213,1079.504644,697.829438,177.606742,3.337865,0.01593,16.749064,0.126592,4.348101
std,73.92,56.470326,31.284398,161.986898,88.452215,29.875916,2.242625,0.002699,6.376704,0.106337,1.739823
min,217.0,0.0,0.0,512.0,507.0,125.0,0.0,0.013,6.0,0.0,2.2
25%,353.5,0.0,0.0,998.0,633.0,160.0,2.4,0.015,12.0,0.05,3.1825
50%,402.0,0.0,0.0,1125.0,688.0,179.0,4.0,0.015,18.0,0.1,3.724
75%,450.0,86.0,20.0,1180.0,781.0,188.0,4.81,0.0155,20.0,0.2,4.885
max,613.33,168.0,126.0,1540.0,875.0,301.0,8.36,0.03,30.0,0.5,9.8


In [None]:
# split into input (X) and output (Y) variables
X = data.drop(['Splitting tensile strength (Mpa)'], axis =1)
Y = data['Splitting tensile strength (Mpa)']

print(X.shape)
print(Y.shape)

(267, 10)
(267,)


In [None]:
# randomly spliting the database into training-testing sets as 70%-30%
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=42)

# normalizing the data sets
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [None]:
# Print the shapes of the train and test sets
print("X_train shape:", X_train.shape)
print("y_train shape:", y_train.shape)
print("X_test shape:", X_test.shape)
print("y_test shape:", y_test.shape)

X_train shape: (186, 10)
y_train shape: (186,)
X_test shape: (81, 10)
y_test shape: (81,)


Different machine learning (ML) models are adopted below to predict the shear strength of concrete-filled steel tubes (CFSTs). The performances of these ML models are evaluated and compared with each other.

# Support Vector Regressor

In [None]:
# Hyperparameter Optimization with Grid Search
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVR
svr = SVR()

kernel = ['linear', 'poly', 'rbf', 'sigmoid']
degree = [2, 3, 4, 5]
gamma = ['scale', 'auto']
C = [0.1, 1, 10, 100,300,500]


params = {'kernel' : kernel, 'degree' : degree, 'gamma' : gamma, 'C' : C}
grid_svr = GridSearchCV(estimator = svr,
                        param_grid = params,
                        scoring = 'r2',
                        cv = 5,
                        n_jobs = -1)
grid_svr.fit(X_train, y_train)

# extract best estimator
print(grid_svr.best_params_)

# to test the best fit
print(grid_svr.score(X_train, y_train))
print(grid_svr.score(X_test, y_test))

{'C': 100, 'degree': 2, 'gamma': 'scale', 'kernel': 'rbf'}
0.9932252055373431
0.9736737954107147


In [None]:
from sklearn.svm import SVR

svr = SVR(C = 100, kernel = 'rbf')

# training the model
svr.fit(X_train, y_train)

# predicting the results
Y_train_svr = svr.predict(X_train)
Y_test_svr = svr.predict(X_test)

print("Training R2:", r2_score(y_train, Y_train_svr), "RMSE:", np.sqrt(mean_squared_error(y_train, Y_train_svr)),
      "MAE:", mean_absolute_error(y_train, Y_train_svr))
print("Testing R2:", r2_score(y_test, Y_test_svr), "RMSE:", np.sqrt(mean_squared_error(y_test, Y_test_svr)),
      "MAE:", mean_absolute_error(y_test, Y_test_svr))

Training R2: 0.9932252055373431 RMSE: 0.14364738818627612 MAE: 0.10196643417866515
Testing R2: 0.9736737954107147 RMSE: 0.27736574358160093 MAE: 0.20812459833683836


In [None]:
## convert your array into a dataframe
df_pred = pd.DataFrame (Y_test_svr)
df_pred.to_excel('Pred_svr_test.xlsx')


## convert your array into a dataframe
df_pred = pd.DataFrame (Y_train_svr)
df_pred.to_excel('Pred_svr_train.xlsx')



In [None]:
# Calculate Mean Absolute Percentage Error (MAPE) for both training and testing sets
mape_train = np.mean(np.abs((y_train - Y_train_svr) / y_train)) * 100
mape_test = np.mean(np.abs((y_test - Y_test_svr) / y_test)) * 100
print(f"Mean Absolute Percentage Error (MAPE) - Training: {mape_train:.4f}%")
print(f"Mean Absolute Percentage Error (MAPE) - Testing: {mape_test:.4f}%")

Mean Absolute Percentage Error (MAPE) - Training: 2.5493%
Mean Absolute Percentage Error (MAPE) - Testing: 5.2953%


In [None]:
import joblib

In [None]:
# Step 5: Save the model and scaler
joblib.dump(svr, 'svr.joblib')  # Save the trained XGBoost model
joblib.dump(scaler, 'scaler.joblib')  # Save the scaler

print("Model and scaler saved successfully.")

Model and scaler saved successfully.


In [None]:
import tkinter as tk
from tkinter import messagebox
import pandas as pd
import numpy as np
import shap
import joblib
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import matplotlib.pyplot as plt
from xgboost import XGBRegressor
from sklearn.preprocessing import StandardScaler

# Matplotlib style
plt.style.use('seaborn-ticks')
plt.rcParams["font.family"] = "Times New Roman"

# Feature names (customize as needed)
featureName = [
    r'$\mathbf{Cement\ (kg/m^3)}$',
    r'$\mathbf{Fly\ ash\ (kg/m^3)}$',
    r'$\mathbf{Silica\ fume\ (kg/m^3)}$',
    r'$\mathbf{Coarse\ aggregate\ (kg/m^3)}$',
    r'$\mathbf{Fine\ aggregate\ (kg/m^3)}$',
    r'$\mathbf{Water\ (kg/m^3)}$',
    r'$\mathbf{Water\ reducing\ agent\ (kg/m^3)}$',
    r'$\mathbf{Fiber\ diameter\ (mm)}$',
    r'$\mathbf{Fiber\ length\ (mm)}$',
    r'$\mathbf{Fiber\ content\ (\%)}$'
]

# Initialize the Tkinter window
root = tk.Tk()
root.title("GUI")
root.geometry("950x700")

# Create canvas for displaying the SHAP plot
frame_canvas = tk.Canvas(root)
frame_canvas.pack(fill=tk.BOTH, expand=True)

# Load model and scaler
try:
    svr = joblib.load('svr.joblib')
    scaler = joblib.load('scaler.joblib')
except Exception as e:
    messagebox.showerror("Error", f"Error loading model or scaler: {e}")
    root.destroy()

# Load data for prediction and SHAP calculation
def load_data_and_plot():
    try:
        # Load data from Excel file
        df = pd.read_excel("BFRC_STS.xlsx")

        # Assuming the data includes the relevant features and the target variable
        X = df.iloc[:, :-1]  # Features (excluding target variable)
        y = df.iloc[:, -1]  # Target variable (Concrete Compressive Strength)

        # Standardize the features
        X_scaled = scaler.transform(X)

        # Calculate SHAP values
        explainer = shap.Explainer(xgboost_model)
        shap_values = explainer(X_scaled)

        # Create a SHAP summary plot (Feature importance)
        fig, ax = plt.subplots(figsize=(10, 6))
        shap.summary_plot(shap_values, X_scaled, feature_names=featureName, plot_type="bar", ax=ax)

        # Display plot in Tkinter window
        canvas = FigureCanvasTkAgg(fig, master=frame_canvas)
        canvas.draw()
        canvas.get_tk_widget().pack(fill=tk.BOTH, expand=True)

    except Exception as e:
        messagebox.showerror("Error", f"Error loading data or generating SHAP plot: {e}")

# Load data and plot SHAP feature importance
load_data_and_plot()

# Function to create labels on the canvas
def create_label(text, font, fg, bg, x, y):
    label = tk.Label(root, text=text, font=font, fg=fg, bg=bg)
    frame_canvas.create_window(x, y, anchor="w", window=label)  # Now canvas can use create_window
    return label

# GUI Titles
label_inputdefinetitle2 = create_label('GUI model for Predicting Splitting Tensile Strength of BFRC',
                                      ('Comic Sans MS', 18, 'bold', 'underline'), '#0000FF', '#FFFF00', 20, 30)
label_inputdefinetitle3 = create_label('Developed by:  Abul KASHEM,Pobithra Das,Sourov Paul,Kaffayatullah Khan,Abdulrahman Fahad Al Fuhaid, Md Arifuzzaman',
                                      ('Comic Sans MS', 14, 'bold'), '#C00000', '#FFFFFF', 20, 70)
label_inputdefinetitle = create_label('Input Parameters',
                                      ('Comic Sans MS', 16, 'bold'), '#000000', '#FFFFFF', 50, 120)

# Parameter Definitions (left)
param_labels = [
    'X1: Cement (kg/m³)',
    'X2: Fly Ash (kg/m³)',
    'X3: Silica Fume (kg/m³)',
    'X4: Coarse Aggregate (kg/m³)',
    'X5: Fine Aggregate (kg/m³)',
    'X6: Water (kg/m³)',
    'X7: Water Reducing Agent (kg/m³)',
    'X8: Fiber Content (%)',
    'X9: Fiber Diameter (mm)',
    'X10: Fiber Length (mm)'
]

# Draw parameter labels on left side
for i, text in enumerate(param_labels, start=1):
    create_label(text, ('Comic Sans MS', 14), '#00008B', '#FFFFFF', 50, 180 + i*40)

# Input title (right)
label_inputs = create_label('Inputs', ('Comic Sans MS', 16, 'bold'), '#000000', '#FFFFFF', 620, 180)

# Input Fields (right)
entry_fields = ['X1', 'X2', 'X3', 'X4', 'X5', 'X6', 'X7', 'X8','X9','X10']
entries = {}
for i, field in enumerate(entry_fields, start=1):
    create_label(f'{field} = ', ('Comic Sans MS', 14, 'bold'), '#006600', '#FFFFFF', 650, 180 + i*40)
    entry = tk.Entry(root, font=('Comic Sans MS', 14, 'bold'), bg='#F0F0F0',
                     highlightbackground='#000000', highlightthickness=1, bd=0, width=20)
    frame_canvas.create_window(830, 180 + i*40, window=entry)
    entries[field] = entry

# Output Section (placed much lower)
label_output = create_label('Output:', ('Comic Sans MS', 16, 'bold'), '#000000', '#FFFFFF', 50, 650)
label_result = create_label('Splitting Tensile strength (MPa)',
                            ('Comic Sans MS', 16, 'bold'), '#C00000', '#FFFFFF', 200, 650)

# Optional: Set proper window height
root.geometry("1050x800")  # Adjust based on your needs


# Predict button command function
def predict():
    # Collect input data
    input_values = []
    for field in entry_fields:
        try:
            value = float(entries[field].get())
            if value <= 0:
                raise ValueError("Value must be positive.")
            input_values.append(value)
        except ValueError as e:
            messagebox.showerror("Error", f"Invalid input for {field}. Please enter a positive numeric value.")
            return

    input_data = np.array([input_values])
    try:
        # Predict using the model
        input_scaled = scaler.transform(input_data)
        prediction = xgboost_model.predict(input_scaled)
        label_result['text'] = f'STS = {prediction[0]:.4f} (MPa)'
    except Exception as e:
        messagebox.showerror("Error", f"An error occurred during prediction: {e}")

# Predict button
predict_button = tk.Button(root, text='Calculate', font=('Comic Sans MS', 16), command=predict, fg='#FFFFFF', bg='dark blue')
frame_canvas.create_window(750, 650, window=predict_button)

# Run the Tkinter event loop
root.mainloop()


ntree_limit is deprecated, use `iteration_range` or model slicing instead.
X does not have valid feature names, but StandardScaler was fitted with feature names
