In [1]:
# Upload your Excel file in Colab
from google.colab import files
uploaded = files.upload()

# Import libraries
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

# Load Excel dynamically
file_name = next(iter(uploaded))
df = pd.read_excel(file_name, sheet_name="Data Base", header=1)

# Clean column names and calculate duration
df = df.rename(columns={
    'Size [m2]': 'Size_m2',
    'Rate per m2 [€]': 'Rate_per_m2',
    'Start date': 'Start_Date',
    'End date': 'End_Date',
    'No. of Civil Engineers': 'Num_Civil_Engineers',
    'No. of Const. workers': 'Num_Construction_Workers',
    'No. of Architects': 'Num_Architects',
    'k (arc) - value': 'k_architect',
    'k (ce) - value': 'k_engineer',
    'm2/hour per one worker': 'Productivity_worker'
})

df["Duration_Months"] = (pd.to_datetime(df["End_Date"], dayfirst=True) - pd.to_datetime(df["Start_Date"], dayfirst=True)).dt.days / 30

# Train the regression model
features = ['Size_m2', 'Rate_per_m2', 'Num_Architects', 'Num_Civil_Engineers',
            'Num_Construction_Workers', 'k_architect', 'k_engineer', 'Productivity_worker']
df_model = df.dropna(subset=features + ['Duration_Months'])

X = df_model[features]
y = df_model["Duration_Months"]

reg_model = LinearRegression()
reg_model.fit(X, y)

# Define the unified predictor function
def predict_duration(size_m2, num_architects, num_engineers, num_workers, complexity_level, rate_per_m2=1000):
    complexity_levels = {
        "simple": {"k_architect": 0.15, "k_engineer": 0.10, "productivity": 6.0},
        "medium": {"k_architect": 0.20, "k_engineer": 0.15, "productivity": 4.0},
        "high": {"k_architect": 0.30, "k_engineer": 0.20, "productivity": 3.0}
    }

    if complexity_level not in complexity_levels:
        raise ValueError("Complexity must be 'simple', 'medium', or 'high'.")

    k_arch = complexity_levels[complexity_level]["k_architect"]
    k_eng = complexity_levels[complexity_level]["k_engineer"]
    prod_worker = complexity_levels[complexity_level]["productivity"]

    def get_efficiency(n):
        if n <= 4:
            return 0.75
        elif n <= 8:
            return 0.60
        else:
            return 0.55

    eff_arch = get_efficiency(num_architects)
    eff_eng = get_efficiency(num_engineers)

    # SIMULATION MODEL
    T_design = (k_arch * size_m2) / (num_architects * eff_arch) / (160 * 4) + \
               (k_eng * size_m2) / (num_engineers * eff_eng) / (160 * 4)
    T_construction = size_m2 / (num_workers * prod_worker)
    T_simulated = T_design + T_construction

    # REGRESSION MODEL with correct feature names
    reg_input = pd.DataFrame([{
        'Size_m2': size_m2,
        'Rate_per_m2': rate_per_m2,
        'Num_Architects': num_architects,
        'Num_Civil_Engineers': num_engineers,
        'Num_Construction_Workers': num_workers,
        'k_architect': k_arch,
        'k_engineer': k_eng,
        'Productivity_worker': prod_worker
    }])
    T_predicted = reg_model.predict(reg_input)[0]

    return {
        "Simulation Duration (months)": round(T_simulated, 2),
        "Regression Duration (months)": round(T_predicted, 2)
    }

# Project input
result = predict_duration(
    size_m2=5000,
    num_architects=2,
    num_engineers=3,
    num_workers=45,
    complexity_level="medium"
)

print(result)



Saving PCM_16052025.xlsx to PCM_16052025.xlsx
{'Simulation Duration (months)': 29.34, 'Regression Duration (months)': np.float64(26.48)}
