# TDCE Learning Model Basic Model Construction
This notebook display the basic construction of Time Driven Cost Estimation Learning Model step by step without using the experiment script (experiment_script.py)

Import the library

In [3]:
import pandas as pd
import importlib
import sys
import os
import time
import numpy as np
import requests
import ipywidgets as widgets
from IPython.display import display
import git

If run from online source or Google Collab please run this code for clone the model file

In [6]:
run_from_online = False


if run_from_online:
    repo = git.Repo.clone_from('https://huggingface.co/iaecpsu-1/tdce-basic',
                               './tdce-basic',
                               branch='main')

    # fmt:off
    sys.path.append('./tdce-basic/model')
    sys.path.append('./tdce-basic/functions/matrix_generator')
    sys.path.append('./tdce-basic/functions')
    sys.path.append('./tdce-basic/functions/data_extractor')

    import tdce_model as tdce
    import material_fc_layer as mfl
    import employee_fc_layer as efl
    import capital_fc_layer as cfl
    import loss
    import cost_matrix_class as cmc
    import display_input_variation as diva
    import viyacrab_augmentation as viya
    import adjust_data as ajd
    import result_display as rd

    importlib.reload(tdce)
    importlib.reload(mfl)
    importlib.reload(efl)
    importlib.reload(cfl)
    importlib.reload(loss)
    importlib.reload(tdce)
    importlib.reload(cmc)
    importlib.reload(diva)
    importlib.reload(viya)
    importlib.reload(ajd)
    importlib.reload(rd)
    # fmt:on
else :
    # fmt:off
    sys.path.append('../model')
    sys.path.append('../functions/matrix_generator')
    sys.path.append('../functions')
    sys.path.append('../functions/data_extractor')
    
    import tdce_model as tdce
    import material_fc_layer as mfl
    import employee_fc_layer as efl
    import capital_fc_layer as cfl
    import loss
    import cost_matrix_class as cmc
    import display_input_variation as diva
    import viyacrab_augmentation as viya
    import adjust_data as ajd
    import result_display as rd
    
    importlib.reload(tdce)
    importlib.reload(mfl)
    importlib.reload(efl)
    importlib.reload(cfl)
    importlib.reload(loss)
    importlib.reload(tdce)
    importlib.reload(cmc)
    importlib.reload(diva)
    importlib.reload(viya)
    importlib.reload(ajd)
    importlib.reload(rd)
    # fmt:on


## Dataset 
We will use our project dataset for experimental, we pick the [extended-random-dataset](https://huggingface.co/datasets/theethawats98/tdce-example-extended-random) which is the dataset with high dimension but moderate variation to use as case study for out demonstation. We create the dataset in folder `datasets` and then inside it have the folder `extended-random` again. We will create the folder if it is not exist and download the datafile from the our huggingface.

In [None]:
try:
    os.mkdir(f"datasets")
    os.mkdir(f"datasets/extended-random")
except FileExistsError:
    print("Folder is Exist")
    pass

Download Files

In [None]:
capital_cost_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_capital_cost.csv"
capital_path = 'datasets/extended-random/generated_capital_cost.csv'
employee_usage_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_employee_usage.csv"
employee_path = 'datasets/extended-random/generated_employee_usage.csv'
material_usage_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_material_usage.csv"
material_path = 'datasets/extended-random/generated_material_usage.csv'
process_data_link = "https://huggingface.co/datasets/theethawats98/tdce-example-extended-random/resolve/main/generated_process_data.csv"
process_path = 'datasets/extended-random/generated_process_data.csv'


for link, path in [
    (capital_cost_link, capital_path),
    (employee_usage_link, employee_path),
    (material_usage_link, material_path),
    (process_data_link, process_path)
]:
    if not os.path.exists(path):
        response = requests.get(link)
        if response.status_code == 200:
            with open(path, 'wb') as file:
                file.write(response.content)
            print(f'File {path} downloaded successfully')
        else:
            print(f'Failed to download file {path}')
# Downloading the datasets
print("Downloading datasets...")

## Model Setting
Select the correct setting for your model.

In [None]:
hour_day_employee_widget = widgets.BoundedIntText(
    value=8,
    min=1,
    max=24,
    step=1,
    description='Hours per Day for Employee:',
    disabled=False
)


hour_day_capital_cost_widget = widgets.BoundedIntText(
    value=21,
    min=1,
    max=24,
    step=1,
    description='Hours per Day for Utility / Capital Cost:',
    disabled=False
)

use_outlier_removal_widget = widgets.Checkbox(
    value=False,
    description='Enable Outlier Removal',
    disabled=False,
    indent=False
)

outlier_index_widget = widgets.Dropdown(
    options=['1', '1.5', '2'],
    value='1.5',
    description='Removal Idication Index:',
    disabled=False,
)

use_augmentation_widget = widgets.Checkbox(
    value=False,
    description='Enable Data Augmentation',
    disabled=False,
    indent=False
)

element_level_lr_widget = widgets.Dropdown(
    options=['0.001','0.05', '0.01','0.1','0.5'],
    value='0.01',
    description='Element Level Learning Rate:',
    disabled=False,
)

model_level_lr_widget = widgets.Dropdown(
    options=['0.0000001','0.00000001','0.000000001'],
    value='0.00000001',
    description='Model Level Learning Rate:',
    disabled=False,
)


display(hour_day_employee_widget)
display(hour_day_capital_cost_widget)
display(use_outlier_removal_widget)
display(outlier_index_widget)
display(use_augmentation_widget)

Select Learning Rate

In [None]:
display(element_level_lr_widget)
display(model_level_lr_widget)

In [None]:
hour_day_employee = hour_day_employee_widget.value
hour_day_capital_cost = hour_day_capital_cost_widget.value
use_outlier_removal= use_outlier_removal_widget.value
outlier_index = float(outlier_index_widget.value)
use_augmentation = use_augmentation_widget.value

Generated Cost Matrix

In [None]:
folder_path = 'datasets/extended-random'

cost_generator = cmc.CostMatrixGenerator()
cost_generator.change_data_directory(folder_path)
cost_generator.load_data()
input_variation = diva.display_input_variation_by_directory(folder_path)
input_variation.to_csv(f"{folder_path}/data_variation.csv")

## Pre-Processing
djust and filter in input datasets (Material, Employee, Capital Cost) to match the output dataset (Process Dataset) and Depend on Outlier Removal Condition

In [None]:
if use_outlier_removal:
    cost_generator.remove_outlier_iqr(outlier_index)
    (
        new_process_df,
        new_employee_usage,
        new_material_usage,
        new_capital_cost_usage,
    ) = cost_generator.get_data()
    (new_capital_cost_usage, new_employee_usage, new_material_usage) = (
        ajd.adjust_to_match_process(
            capital_cost_usage=new_capital_cost_usage,
            employee_usage=new_employee_usage,
            material_usage=new_material_usage,
            new_process_df=new_process_df,
        )
    )
    new_variation = diva.display_input_variation(
        new_process_df,
        new_material_usage,
        new_employee_usage,
        new_capital_cost_usage,
    )
    new_variation.to_csv(f"{folder_path}/data_variation_after_outlier.csv")
    new_process_df.to_csv(f"{folder_path}/process_df_after_outlier.csv")
    try:
        print("Data Variation After Outlier Removed")
        display(new_variation)
    except Exception as e:
        print("This is not Jupyter Notebook", e)
else:
    display(input_variation)

### Data Splitting
Split training and validation dataset

In [None]:
(
    train_process_df,
    train_employee_usage,
    train_material_usage,
    train_capital_cost,
    validate_process_df,
    validate_employee_usage,
    validate_material_usage,
    validate_capital_cost,
) = cost_generator.train_test_split_without_matrix(0.7)


# Generate Cost Matrix for Validation Set
validation_payload = cost_generator.get_validation_payload(
    validate_process_df
)


# Display Variation of Train Data
train_variation = diva.display_input_variation(
    train_process_df,
    train_material_usage,
    train_employee_usage,
    train_capital_cost,
)
train_variation.to_csv(
    f"{folder_path}/train_data_variation.csv")
train_process_df.to_csv(
    f"{folder_path}/train_process_df.csv")
# Display Variation of Validation Data
validate_variation = diva.display_input_variation(
    validate_process_df,
    validate_material_usage,
    validate_employee_usage,
    validate_capital_cost,
)
validate_variation.to_csv(
    f"{folder_path}/validate_data_variation.csv"
)
if use_augmentation:
    # TODO:  Increase the Generalization of the Model
    # Augmented the Imbalance Class of Training Data
    train_process_df.to_csv(
        f"{folder_path}/train_process_df_before_augmented.csv"
    )
    train_process_df = viya.vy_training_augmentation(
        train_process_df)
    # Display Variation of Train Data After Augmented
    train_variation = diva.display_input_variation(
        train_process_df,
        train_material_usage,
        train_employee_usage,
        train_capital_cost,
    )
    train_variation.to_csv(
        f"{folder_path}/train_data_variation_after_augmented.csv"
    )
    train_process_df.to_csv(
        f"{folder_path}/train_process_df_after_augmented_{round}.csv"
    )
    
# Generate Matrix From Training Set
(
    material_cost_matrix,
    material_amount_matrix,
    employee_cost_matrix,
    employee_duration_matrix,
    employee_day_amount_matrix,
    capital_cost_matrix,
    day_amount_matrix,
    capital_cost_duration_matrix,  # New On Finetune
    result_matrix,
) = cost_generator.generate_data_from_input(
    train_process_df,
    train_material_usage,
    train_employee_usage,
    train_capital_cost,
)

## Initial Model