In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, root_mean_squared_error
from sklearn.linear_model import LinearRegression
import warnings
import matplotlib.pyplot as plt
warnings.filterwarnings('ignore')

# Load data
shear_strength = pd.read_csv('shear_strength.csv', header=None)
new_or_old = pd.read_csv('new_or_old.csv', header=None)
load_type = pd.read_csv('load_type.csv', header=None)
wall_l = pd.read_csv('wall_l.csv', header=None)
wall_h = pd.read_csv('wall_h.csv', header=None)
wall_t = pd.read_csv('wall_t.csv', header=None)
leaf_num = pd.read_csv('leaf_num.csv', header=None)
bond_pattern = pd.read_csv('bond_pattern.csv', header=None)
ft_mortar = pd.read_csv('ft_mortar.csv', header=None)
ft_brick = pd.read_csv('ft_brick.csv', header=None)

# Combine features and target
X = pd.concat([new_or_old, wall_l, wall_t, leaf_num, bond_pattern, ft_mortar, ft_brick], axis=1)
X.columns = ['new_or_old', 'wall_l', 'wall_t', 'leaf_num', 'bond_pattern', 'ft_mortar', 'ft_brick']
y_raw = shear_strength
y_raw.columns = ['shear_strength']
y = np.log(y_raw)
y.columns = ['shear_strength']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=4)

# Scale data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Train model
model = LinearRegression()
model.fit(X_train, y_train.values.ravel())

# Predictions
y_train_pred_log = model.predict(X_train)
y_test_pred_log = model.predict(X_test)

y_train_pred = np.exp(y_train_pred_log)
y_test_pred = np.exp(y_test_pred_log)

y_train_np = y_raw.iloc[y_train.index].to_numpy().squeeze()
y_test_np = y_raw.iloc[y_test.index].to_numpy().squeeze()

# Training metrics
r2_train = r2_score(y_train_np, y_train_pred)
rmse_train = root_mean_squared_error(y_train_np, y_train_pred)
ratio_train = y_train_np / y_train_pred
mean_ratio_train = np.mean(ratio_train)
cov_ratio_p_train = np.cov(ratio_train, rowvar=False, ddof=1) * 100

# Testing metrics
r2_test = r2_score(y_test_np, y_test_pred)
rmse_test = root_mean_squared_error(y_test_np, y_test_pred)
ratio_test = y_test_np / y_test_pred
mean_ratio_test = np.mean(ratio_test)
cov_ratio_p_test = np.cov(ratio_test, rowvar=False, ddof=1) * 100

# Output results
print("Linear Regression Results")
print(f"Training Set - R²: {r2_train:.3f}, RMSE: {rmse_train:.3f}, mean_ratio: {mean_ratio_train:.3f}, cov_ratio(%): {cov_ratio_p_train:.1f}")
print(f"Testing Set  - R²: {r2_test:.3f}, RMSE: {rmse_test:.3f}, mean_ratio: {mean_ratio_test:.3f}, cov_ratio(%): {cov_ratio_p_test:.1f}")


Linear Regression Results
Training Set - R²: 0.293, RMSE: 0.367, mean_ratio: 1.257, cov_ratio(%): 57.5
Testing Set  - R²: 0.418, RMSE: 0.353, mean_ratio: 1.140, cov_ratio(%): 43.2
