In [1]:
# =============================================================================
# Section 1: Import Libraries
# =============================================================================
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import re
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from joblib import dump
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader

# Set plotting style for better visuals
sns.set_style("whitegrid")
print("Libraries imported successfully.")


# =============================================================================
# Section 2: Data Loading, Cleaning, and Merging
# =============================================================================
print("\n--- Section 2: Loading, Cleaning & Merging Data ---")

def clean_dataframe(df):
    """A reusable function to perform initial cleaning on a dataframe."""
    df.rename(columns={'Unnamed: 0': 'Project_Name'}, inplace=True)
    df.drop(columns=['Architectural aspect'], inplace=True, errors='ignore')
    df['Project_Name'] = df['Project_Name'].str.strip().str.lower()
    feature_cols = [
        'Quantity of plaster (sq.m.)', 'Quantity of glazed tiles (sq.m.)',
        'Painting masonry (sq.m.)', 'painting wood (sq.m.)',
        'painting metal (sq.m.)', 'Area of CHB 100mm (sq.m.)',
        'Area of CHB 150mm (sq.m.)'
    ]
    for col in feature_cols:
        if col in df.columns:
            df[col] = df[col].astype(str).str.replace(',', '').str.replace('`', '')
            df[col] = pd.to_numeric(df[col], errors='coerce')
    for col in df.columns:
         if pd.api.types.is_numeric_dtype(df[col]):
            if df[col].isnull().any():
                median_val = df[col].median()
                df[col].fillna(median_val, inplace=True)
    return df

def extract_budget(text):
    """Extracts the budget value from the 'Year/Budget' string."""
    if isinstance(text, str):
        matches = re.findall(r'[\d,]+\.?\d*', text)
        for match in matches:
            if len(match) > 4:
                return float(match.replace(',', ''))
    return None

try:
    df_quantity = pd.read_csv('Thesis Data - Architectural Quantity Cost.csv')
    print("Architectural Quantity data loaded.")
    df_unit_cost = pd.read_csv('Thesis Data - Achitectural Unit Cost.csv')
    print("Architectural Unit Cost data loaded.")
except FileNotFoundError as e:
    print(f"Error: {e}. Make sure both CSV files are in the correct directory.")
    exit()

# --- Clean and Merge Dataframes ---
df_quantity_cleaned = clean_dataframe(df_quantity.copy())
df_unit_cost_cleaned = clean_dataframe(df_unit_cost.copy())
df_quantity_cleaned['Budget'] = df_quantity_cleaned['Year/Budget'].apply(extract_budget)
df_quantity_cleaned.drop(columns=['Year/Budget'], inplace=True)
df_unit_cost_cleaned.drop(columns=['Year/Budget'], inplace=True)
df_merged = pd.merge(
    df_quantity_cleaned,
    df_unit_cost_cleaned,
    on='Project_Name',
    suffixes=('_qty', '_cost')
)
df_merged = df_merged.dropna(subset=['Budget'])
df_merged = df_merged[df_merged['Budget'] > 100000].copy()
print(f"Dataframes merged. Working with {len(df_merged)} common projects.")


# =============================================================================
# Section 3: **UPDATED** - Granular Feature Engineering & Visualization
# =============================================================================
print("\n--- Section 3: Engineering Granular Features & Analysis ---")

# --- Step 1: Multiply Quantity by Unit Cost for Granular Features ---
individual_cost_features = []
base_feature_cols = [
    'Quantity of plaster (sq.m.)', 'Quantity of glazed tiles (sq.m.)',
    'Painting masonry (sq.m.)', 'painting wood (sq.m.)',
    'painting metal (sq.m.)', 'Area of CHB 100mm (sq.m.)',
    'Area of CHB 150mm (sq.m.)'
]

for col in base_feature_cols:
    qty_col = col + '_qty'
    cost_col = col + '_cost'
    new_cost_feature = col.replace(' (sq.m.)', '').replace('Quantity of ', '').replace('Area of ', '').replace(' ', '_') + '_Est_Cost'
    df_merged[new_cost_feature] = df_merged[qty_col] * df_merged[cost_col]
    individual_cost_features.append(new_cost_feature)

print("Created new granular features for each architectural component:")
print(individual_cost_features)

# --- Step 2: Create Contextual and Target Features ---
df_merged['Num_Storeys'] = df_merged['Project_Name'].str.extract(r'(\d+)\s*sty').astype(float)
df_merged['Num_Classrooms'] = df_merged['Project_Name'].str.extract(r'(\d+)\s*cl').astype(float)
df_merged['Num_Storeys'].fillna(df_merged['Num_Storeys'].median(), inplace=True)
df_merged['Num_Classrooms'].fillna(df_merged['Num_Classrooms'].median(), inplace=True)
df_merged['Budget_log'] = np.log1p(df_merged['Budget'])

# --- Step 3: Visualization and Analysis ---
print("\n--- Generating and Saving Visualizations for Analysis ---")

# Create the directory for images if it doesn't exist
output_dir = 'visualization images'
os.makedirs(output_dir, exist_ok=True)

# 3.1: Justifying Log-Transformation
fig, axes = plt.subplots(1, 2, figsize=(16, 6))
sns.histplot(df_merged['Budget'], kde=True, ax=axes[0], bins=30)
axes[0].set_title('Distribution of Original Budget (Right-Skewed)')
axes[0].set_xlabel('Budget (PHP)')
axes[0].ticklabel_format(style='plain', axis='x')
sns.histplot(df_merged['Budget_log'], kde=True, ax=axes[1], color='green', bins=30)
axes[1].set_title('Distribution of Log-Transformed Budget (Normalized)')
axes[1].set_xlabel('Log(1 + Budget)')
plt.suptitle('Effect of Log-Transformation on Target Variable', size=16)
plt.tight_layout(rect=[0, 0.03, 1, 0.95])
plt.savefig(os.path.join(output_dir, 'architectural_log_transformation_effect.png'), bbox_inches='tight')
plt.close(fig)
print(f"Saved: {os.path.join(output_dir, 'architectural_log_transformation_effect.png')}")

# 3.2: Correlation Matrix of Final Engineered Features
plt.figure(figsize=(14, 12))
heatmap_cols = individual_cost_features + ['Num_Storeys', 'Num_Classrooms', 'Budget']
correlation_matrix = df_merged[heatmap_cols].corr()
sns.heatmap(correlation_matrix, annot=True, cmap='viridis', fmt='.2f')
plt.title('Correlation Matrix of Granular Architectural Features', size=16)
plt.savefig(os.path.join(output_dir, 'architectural_features_correlation_matrix.png'), bbox_inches='tight')
plt.close()
print(f"Saved: {os.path.join(output_dir, 'architectural_features_correlation_matrix.png')}")

# 3.3: Justifying the Need for Standardization
final_feature_columns = individual_cost_features + ['Num_Storeys', 'Num_Classrooms']
X_for_viz = df_merged[final_feature_columns].copy()
X_for_viz.fillna(X_for_viz.median(), inplace=True)

plt.figure(figsize=(10, 8))
sns.boxplot(data=X_for_viz, orient='h')
plt.title('Architectural Feature Scales Before Standardization')
plt.xlabel('Original Feature Values (Varying Scales)')
plt.xscale('log')
plt.savefig(os.path.join(output_dir, 'architectural_feature_scales_before_standardization.png'), bbox_inches='tight')
plt.close()
print(f"Saved: {os.path.join(output_dir, 'architectural_feature_scales_before_standardization.png')}")


# =============================================================================
# Section 4: **UPDATED** - Data Preparation for the ANN Model
# =============================================================================
print("\n--- Section 4: Preparing Data for the ANN Model ---")

# --- Define the features (X) and the target (y) ---
X = df_merged[final_feature_columns]
y = df_merged[['Budget_log']] # Use the log-transformed budget

print(f"\nTraining model with {X.shape[1]} input features.")

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Feature and Target Scaling ---
scaler_X = StandardScaler()
X_train_scaled = scaler_X.fit_transform(X_train)
X_test_scaled = scaler_X.transform(X_test)

scaler_y = MinMaxScaler()
y_train_scaled = scaler_y.fit_transform(y_train)
y_test_scaled = scaler_y.transform(y_test)

# --- Convert to PyTorch Tensors ---
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_scaled, dtype=torch.float32)

print("Data has been split, scaled, and converted to tensors.")


# =============================================================================
# Section 5: Build and Train the Artificial Neural Network
# =============================================================================
print("\n--- Section 5: Building and Training the ANN ---")

class RegressionNet(nn.Module):
    def __init__(self, input_features):
        super(RegressionNet, self).__init__()
        self.layer1 = nn.Linear(input_features, 128)
        self.dropout1 = nn.Dropout(0.3)
        self.layer2 = nn.Linear(128, 64)
        self.dropout2 = nn.Dropout(0.2)
        self.layer3 = nn.Linear(64, 32)
        self.output_layer = nn.Linear(32, 1)

    def forward(self, x):
        x = F.relu(self.layer1(x))
        x = self.dropout1(x)
        x = F.relu(self.layer2(x))
        x = self.dropout2(x)
        x = F.relu(self.layer3(x))
        x = torch.sigmoid(self.output_layer(x))
        return x

# --- Model Initialization and Training Setup ---
input_size = X_train_tensor.shape[1]
model = RegressionNet(input_features=input_size)
print("Model Architecture:")
print(model)

optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()
batch_size = 16
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

# --- Training Loop ---
epochs = 200
train_losses = []
for epoch in range(epochs):
    model.train()
    epoch_loss = 0
    for features, targets in train_loader:
        outputs = model(features)
        loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    avg_loss = epoch_loss / len(train_loader)
    train_losses.append(avg_loss)
    if (epoch + 1) % 20 == 0:
        print(f'Epoch [{epoch+1}/{epochs}], Loss: {avg_loss:.6f}')

print("Neural Network training complete.")

# --- Plot and Save Training Loss ---
plt.figure(figsize=(10, 6))
plt.plot(train_losses, label='Training Loss')
plt.title('Architectural Model Training Loss Over Epochs', size=16)
plt.xlabel('Epoch', size=12)
plt.ylabel('Mean Squared Error Loss', size=12)
plt.legend()
plt.savefig(os.path.join(output_dir, 'architectural_model_training_loss_curve.png'), bbox_inches='tight')
plt.close()
print(f"Saved: {os.path.join(output_dir, 'architectural_model_training_loss_curve.png')}")


# =============================================================================
# Section 6: Model Evaluation
# =============================================================================
print("\n--- Section 6: Evaluating the ANN Model (with Granular Features) ---")

model.eval()
with torch.no_grad():
    scaled_predictions = model(X_test_tensor).numpy()

# --- Inverse Transform Predictions to Original Scale ---
log_predictions = scaler_y.inverse_transform(scaled_predictions)
final_predictions = np.expm1(log_predictions).flatten()
y_test_actual = np.expm1(y_test.values).flatten()

# --- Calculate and Display Performance Metrics ---
r2_ann = r2_score(y_test_actual, final_predictions)
mae_ann = mean_absolute_error(y_test_actual, final_predictions)
rmse_ann = np.sqrt(mean_squared_error(y_test_actual, final_predictions))

print("\n--- Final Model Performance ---")
print(f"R-squared (R²): {r2_ann:.4f}")
print(f"Mean Absolute Error (MAE): ₱{mae_ann:,.2f}")
print(f"Root Mean Squared Error (RMSE): ₱{rmse_ann:,.2f}")

# --- Visualization: Actual vs. Predicted Budget ---
plt.figure(figsize=(10, 7))
plt.scatter(y_test_actual, final_predictions, alpha=0.6, edgecolors='w', label='Predictions')
plt.plot([y_test_actual.min(), y_test_actual.max()], [y_test_actual.min(), y_test_actual.max()],
         'r--', lw=2, label='Perfect Fit')
plt.title('Actual vs. Predicted Project Budget (Architectural Model)', size=16)
plt.xlabel('Actual Budget (PHP)', size=12)
plt.ylabel('Predicted Budget (PHP)', size=12)
plt.ticklabel_format(style='plain', axis='both')
plt.legend()
plt.savefig(os.path.join(output_dir, 'architectural_actual_vs_predicted_budget.png'), bbox_inches='tight')
plt.close()
print(f"Saved: {os.path.join(output_dir, 'architectural_actual_vs_predicted_budget.png')}")

# --- Visualization: Residuals Plot ---
residuals = y_test_actual - final_predictions
plt.figure(figsize=(10, 6))
sns.scatterplot(x=final_predictions, y=residuals, alpha=0.7, edgecolors='k', c='green')
plt.axhline(y=0, color='r', linestyle='--')
plt.title('Residuals Plot (Architectural Model)')
plt.xlabel('Predicted Budget (PHP)')
plt.ylabel('Residuals (Actual - Predicted)')
plt.ticklabel_format(style='plain', axis='both')
plt.savefig(os.path.join(output_dir, 'architectural_residuals_plot.png'), bbox_inches='tight')
plt.close()
print(f"Saved: {os.path.join(output_dir, 'architectural_residuals_plot.png')}")

# =============================================================================
# Section 7: Save Final Assets
# =============================================================================
print("\n--- Section 7: Saving Final Model and Scalers ---")

torch.save(model.state_dict(), 'ann_granular_model.pth')
print("ANN model saved as 'ann_granular_model.pth'")
dump(scaler_X, 'scaler_X_granular.joblib')
dump(scaler_y, 'scaler_y_granular.joblib')
print("Feature and target scalers saved.")

print("\nProcess finished successfully.")

Libraries imported successfully.

--- Section 2: Loading, Cleaning & Merging Data ---
Architectural Quantity data loaded.
Architectural Unit Cost data loaded.
Dataframes merged. Working with 142 common projects.

--- Section 3: Engineering Granular Features & Analysis ---
Created new granular features for each architectural component:
['plaster_Est_Cost', 'glazed_tiles_Est_Cost', 'Painting_masonry_Est_Cost', 'painting_wood_Est_Cost', 'painting_metal_Est_Cost', 'CHB_100mm_Est_Cost', 'CHB_150mm_Est_Cost']

--- Generating and Saving Visualizations for Analysis ---


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df[col].fillna(median_val, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always beha

Saved: visualization images\architectural_log_transformation_effect.png
Saved: visualization images\architectural_features_correlation_matrix.png
Saved: visualization images\architectural_feature_scales_before_standardization.png

--- Section 4: Preparing Data for the ANN Model ---

Training model with 9 input features.
Data has been split, scaled, and converted to tensors.

--- Section 5: Building and Training the ANN ---
Model Architecture:
RegressionNet(
  (layer1): Linear(in_features=9, out_features=128, bias=True)
  (dropout1): Dropout(p=0.3, inplace=False)
  (layer2): Linear(in_features=128, out_features=64, bias=True)
  (dropout2): Dropout(p=0.2, inplace=False)
  (layer3): Linear(in_features=64, out_features=32, bias=True)
  (output_layer): Linear(in_features=32, out_features=1, bias=True)
)
Epoch [20/200], Loss: 0.014828
Epoch [40/200], Loss: 0.007794
Epoch [60/200], Loss: 0.007020
Epoch [80/200], Loss: 0.005404
Epoch [100/200], Loss: 0.004234
Epoch [120/200], Loss: 0.004380
Ep

Of course. Here is a detailed markdown explanation of the code's process and the insights derived from the visualizations, based on your `Arch_python_model_updated.ipynb` notebook and its outputs.

---

### **Analysis of the Architectural Budget Prediction Model**

This document provides a technical breakdown of the process and results from the `Arch_training.ipynb` notebook. The analysis covers the data processing pipeline, the training of the Artificial Neural Network (ANN), and an interpretation of the key visualizations generated.

### **Part 1: The Code - Process and Significance**

The notebook is structured into a logical sequence of data preparation, model training, and evaluation. Each section plays a critical role in developing a reliable prediction model.

#### **Section 1 & 2: Data Loading, Cleaning, and Merging**
*   **Process:** The script begins by loading two separate CSV files: `Thesis Data - Architectural Quantity Cost.csv` and `Thesis Data - Achitectural Unit Cost.csv`. It then performs several cleaning steps:
    1.  Renames columns for clarity.
    2.  Converts columns containing numerical data (like quantities and costs) from text to numeric types, handling potential formatting issues like commas.
    3.  Imputes (fills in) any missing numerical values using the median of that column. This is a robust way to handle missing data without being skewed by outliers.
    4.  Extracts the numerical `Budget` from a text column using regular expressions.
    5.  Merges the two dataframes into a single `df_merged` based on the common `Project_Name`.
*   **Significance:** This is the foundational step. By cleaning and merging the data, we create a single, unified dataset that contains both the quantities of materials for a project and their corresponding unit costs. This allows for the creation of meaningful cost-based features, which are essential for predicting the final budget.

#### **Section 3: Granular Feature Engineering & Visualization**
*   **Process:** This section transforms the raw data into more predictive features:
    1.  **Granular Cost Calculation:** It calculates the estimated cost for each individual architectural component (e.g., `plaster_Est_Cost`) by multiplying its quantity by its median unit cost from the `df_unit_cost` dataset.
    2.  **Contextual Features:** It extracts the `Num_Storeys` and `Num_Classrooms` from the project names to provide the model with context about the project's scale.
    3.  **Target Transformation:** The target variable, `Budget`, is log-transformed (`np.log1p`) to normalize its distribution.
    4.  **Visualization Generation & Saving:** It generates and saves key plots that are analyzed in Part 2 of this document.
*   **Significance:** This is arguably the most important section for model performance. Instead of forcing the model to learn the complex relationship between raw quantities and budget, we provide it with pre-calculated, highly relevant features (the estimated costs). Log-transforming the target variable prevents the model from being disproportionately influenced by a few extremely high-budget projects, leading to a more stable and accurate model.

#### **Section 4 & 5: ANN Data Preparation and Training**
*   **Process:**
    1.  The final set of input features (X) and the target variable (y) are defined.
    2.  The data is split into a training set (80%) and a testing set (20%).
    3.  The input features (X) are standardized using `StandardScaler`, which rescales them to have a mean of 0 and a standard deviation of 1.
    4.  The log-transformed target variable (y) is scaled to a range of using `MinMaxScaler`.
    5.  All data is converted into PyTorch Tensors, the required format for the neural network.
    6.  An ANN architecture with three hidden layers and dropout for regularization is defined and trained for 200 epochs.
*   **Significance:** This section ensures the data is in the optimal format for the ANN. **Splitting** the data is crucial to evaluate the model's ability to generalize to new, unseen data. **Standardizing inputs** ensures that features with large numerical ranges (like costs) do not dominate the learning process. **Min-Max scaling the target** is necessary because the model's final sigmoid activation function outputs values between 0 and 1.

#### **Section 6 & 7: Model Evaluation and Asset Saving**
*   **Process:** After training, the model's performance is evaluated on the unseen test data. Predictions are made and then inverse-transformed back to their original PHP currency scale. Performance is measured with R-squared (R²), Mean Absolute Error (MAE), and Root Mean Squared Error (RMSE). Finally, the trained model and the scalers are saved to disk.
*   **Significance:** This is the core validation step. Evaluating the model on data it has never seen before gives an honest measure of its predictive power. The final metrics (R² of 0.9042, MAE of ~₱2.87M) quantify the model's success. Saving the model (`ann_granular_model.pth`) and scalers is crucial for deploying the model in a real-world application.

---

### **Part 2: Analysis of Visualizations and Model Performance**

#### **1. Effect of Log-Transformation on Target Variable**
*   **Technical Explanation:** The left histogram displays the distribution of the original project budgets, while the right histogram shows the same data after applying a log-transformation.
*   **Interpretation and Insights:**
    *   The original budget distribution on the left is heavily **right-skewed**. A large number of projects are clustered at lower budget values (under ₱20 million), with a long tail of fewer, high-cost projects. This skewness can cause a model to be biased and perform poorly.
    *   The log-transformed distribution on the right is much more **symmetric and bell-shaped**, resembling a normal distribution. This transformation stabilizes the target variable, making the underlying patterns easier for the neural network to learn and reducing the influence of extreme outliers. This plot visually justifies why log-transformation is a critical preprocessing step.

#### **2. Architectural Feature Scales Before Standardization**
*   **Technical Explanation:** This box plot displays the distribution and range of each raw input feature before scaling. The x-axis is on a logarithmic scale to accommodate the vast differences in magnitude between features.
*   **Interpretation and Insights:**
    *   The plot dramatically illustrates the problem of **varying scales**. Features like `plaster_Est_Cost` have values in the millions, whereas `Num_Storeys` is in single digits.
    *   Without standardization, the features with larger values would completely dominate the model's learning process, effectively ignoring the predictive information in smaller-scale but important features like `Num_Storeys` and `Num_Classrooms`. This visualization provides a clear justification for using `StandardScaler` to put all features on a comparable scale.

#### **3. Correlation Matrix of Granular Architectural Features**
*   **Technical Explanation:** This heatmap shows the Pearson correlation coefficient between the engineered architectural features and the final `Budget`. Bright yellow indicates a strong positive correlation (+1).
*   **Interpretation and Insights:**
    *   **Strong Positive Correlation with Budget:** Nearly all engineered cost features show a strong positive correlation with the final `Budget`. `CHB_150mm_Est_Cost` (**0.85**), `plaster_Est_Cost` (**0.79**), and `Painting_masonry_Est_Cost` (**0.74**) are the top predictors. This is a powerful validation of the feature engineering approach.
    *   **Highly Predictive Contextual Features:** `Num_Storeys` (**0.75**) and `Num_Classrooms` (**0.52**) are also highly correlated with the budget, which is logical as they are direct measures of project size.
    *   **High Multicollinearity:** There is extremely high correlation between some input features, such as `plaster_Est_Cost` and `Painting_masonry_Est_Cost` (**0.80**). While this can be an issue for simpler linear models, neural networks are generally robust enough to handle it.

#### **4. Architectural Model Training Loss Over Epochs**
*   **Technical Explanation:** This plot tracks the Mean Squared Error (MSE) loss on the training data as it learns over 200 epochs.
*   **Interpretation and Insights:**
    *   **Rapid Learning and Convergence:** The loss drops dramatically within the first 25 epochs, showing the model quickly learns the primary patterns. Afterward, the curve flattens out, indicating that the model has converged to a stable and optimal solution.
    *   **Stable Training:** The curve is relatively smooth, though with some minor spikes (e.g., around epoch 125), which is normal during training. The overall downward trend and low final loss value (around 0.003) indicate a successful training process.

#### **5. Actual vs. Predicted Project Budget (Architectural Model)**
*   **Technical Explanation:** This scatter plot compares the model's budget predictions (Y-axis) against the actual budgets (X-axis) for the unseen test data. The red dashed line represents a perfect prediction.
*   **Interpretation and Insights:**
    *   **Excellent Accuracy:** The data points are very tightly clustered around the "Perfect Fit" line. This is a strong visual confirmation of the outstanding **R-squared (R²) value of 0.9042**. This means the model can explain **90.4%** of the variability in project budgets, indicating a highly accurate and powerful model.
    *   **Low Prediction Error:** The **Mean Absolute Error (MAE) of ₱2,866,371.94** shows that, on average, the model's predictions are off by about ₱2.87 million. The **Root Mean Squared Error (RMSE) of ₱4,380,099.82**, which penalizes larger errors more, is still very reasonable given the multi-million peso scale of the projects.
    *   **Unbiased Predictions:** The points are evenly distributed around the red line, showing no systematic tendency to over- or under-predict. This indicates the model is well-calibrated.

#### **6. Residuals Plot (Architectural Model)**
*   **Technical Explanation:** This plot shows the prediction error (Residual = Actual - Predicted) on the Y-axis against the predicted budget on the X-axis. A good model should have its residuals randomly scattered around the horizontal line at y=0.
*   **Interpretation and Insights:**
    *   **No Obvious Bias:** The residuals are mostly scattered randomly around the zero line with no clear curve or U-shape. This is a good sign, indicating that the model's errors are not systematic.
    *   **Potential for Minor Heteroscedasticity:** There's a slight tendency for the errors to increase in variance as the predicted budget gets larger (the points spread out more towards the right). This is common in financial modeling and indicates the model is slightly less certain about very high-cost projects. However, the presence of only a few large outliers suggests this is not a major issue. Overall, the plot supports the model's validity.