In [18]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.feature_selection import SelectKBest, f_regression
from sklearn.preprocessing import StandardScaler

# Load the dataset
data_url = "https://archive.ics.uci.edu/ml/machine-learning-databases/concrete/compressive/Concrete_Data.xls"
data = pd.read_excel(data_url)

print(data.columns)

# Define the feature set and target variable
X = data.iloc[:, :-1]  # All features except the last column
y = data.iloc[:, -1]   # Target: Concrete compressive strength

# Display original dimensions of the data
original_dim = X.shape[1]
print(f"\nOriginal number of features: {original_dim}")

# Splitting data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Standardizing the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Function to train and evaluate models
def train_evaluate_model(X_train, X_test, y_train, y_test, model):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    mae = mean_absolute_error(y_test, y_pred)
    return rmse, mae



# Feature Importance and Dimension Reduction
selector = SelectKBest(score_func=f_regression, k=6)  # Selecting top 5 features
X_train_reduced = selector.fit_transform(X_train_scaled, y_train)
X_test_reduced = selector.transform(X_test_scaled)



Index(['Cement (component 1)(kg in a m^3 mixture)',
       'Blast Furnace Slag (component 2)(kg in a m^3 mixture)',
       'Fly Ash (component 3)(kg in a m^3 mixture)',
       'Water  (component 4)(kg in a m^3 mixture)',
       'Superplasticizer (component 5)(kg in a m^3 mixture)',
       'Coarse Aggregate  (component 6)(kg in a m^3 mixture)',
       'Fine Aggregate (component 7)(kg in a m^3 mixture)', 'Age (day)',
       'Concrete compressive strength(MPa, megapascals) '],
      dtype='object')

Original number of features: 8


### **Decision** **Tree**

In [19]:
from sklearn.tree import DecisionTreeRegressor

# Decision Tree Regression
tree_model = DecisionTreeRegressor(random_state=42)
rmse_tree, mae_tree = train_evaluate_model(X_train_scaled, X_test_scaled, y_train, y_test, tree_model)

print(f"Decision Tree - RMSE: {rmse_tree}, MAE: {mae_tree}")

# Display reduced dimensions of the data
reduced_dim = X_train_reduced.shape[1]
print(f"\nReduced number of features: {reduced_dim}")


# Retrain models on reduced data
rmse_tree_reduced, mae_tree_reduced = train_evaluate_model(X_train_reduced, X_test_reduced, y_train, y_test, tree_model)
print(f"Reduced Decision Tree - RMSE: {rmse_tree_reduced}, MAE: {mae_tree_reduced}")




Decision Tree - RMSE: 6.507857309920414, MAE: 4.210496690740686

Reduced number of features: 6
Reduced Decision Tree - RMSE: 8.072934689493657, MAE: 4.941882503770349


### **Support vector Machine**

In [20]:
from sklearn.svm import SVR

# Support Vector Regression
svr_model = SVR(kernel='linear')
rmse_svr, mae_svr = train_evaluate_model(X_train_scaled, X_test_scaled, y_train, y_test, svr_model)


print(f"SVR - RMSE: {rmse_svr}, MAE: {mae_svr}")

# Display reduced dimensions of the data
reduced_dim = X_train_reduced.shape[1]
print(f"\nReduced number of features: {reduced_dim}")


# Retrain models on reduced data
rmse_svr_reduced, mae_svr_reduced = train_evaluate_model(X_train_reduced, X_test_reduced, y_train, y_test, svr_model)

print(f"Reduced SVR - RMSE: {rmse_svr_reduced}, MAE: {mae_svr_reduced}")



SVR - RMSE: 11.090964024561979, MAE: 8.326481512943356

Reduced number of features: 6
Reduced SVR - RMSE: 11.264252337634094, MAE: 8.55227337617268


## **Gradient** **Boosting** **Regressor**

In [21]:
from sklearn.ensemble import GradientBoostingRegressor
model = GradientBoostingRegressor(n_estimators=6)
rmse_gbr, mae_gbr = train_evaluate_model(X_train_scaled, X_test_scaled, y_train, y_test, model)
print(f"Random Forest - RMSE: {rmse_gbr}, MAE: {mae_gbr}")

# Display reduced dimensions of the data
reduced_dim = X_train_reduced.shape[1]
print(f"\nReduced number of features: {reduced_dim}")

# Retrain models on reduced data
rmse_svr_reduced, mae_svr_reduced = train_evaluate_model(X_train_reduced, X_test_reduced, y_train, y_test, model)

print(f"Reduced Random forest - RMSE: {rmse_svr_reduced}, MAE: {mae_svr_reduced}")


Random Forest - RMSE: 12.184921515790482, MAE: 9.805521303971021

Reduced number of features: 6
Reduced Random forest - RMSE: 12.169419670703668, MAE: 9.772627740979548


### **Random Forest Regressor**

In [22]:
from sklearn.ensemble import RandomForestRegressor

# For regression
rf_regressor = RandomForestRegressor(
    n_estimators=100,           # Number of trees
    max_depth=15,             # Maximum depth of trees
    random_state=25         # Seed for random number generator
)

# Train and evaluate the model
rmse_rf, mae_rf = train_evaluate_model(X_train_scaled, X_test_scaled, y_train, y_test, rf_regressor)
print(f"Random Forest Regressor - RMSE: {rmse_rf}, MAE: {mae_rf}")

# Display reduced dimensions of the data
reduced_dim = X_train_reduced.shape[1]
print(f"\nReduced number of features: {reduced_dim}")

# Retrain models on reduced data
rmse_r, mae_r = train_evaluate_model(X_train_reduced, X_test_reduced, y_train, y_test, rf_regressor)
print(f"Random Forest Rgressor - RMSE: {rmse_r}, MAE: {mae_r}")

Random Forest Regressor - RMSE: 5.498416517056601, MAE: 3.7676697262140233

Reduced number of features: 6
Random Forest Rgressor - RMSE: 5.993436873386975, MAE: 4.137053510402069
