# Checking AWS Enviornment
Note: I had to spool an ubuntu instance because pulp wasnt working locally

In [5]:
import requests
import os
import io


def get_instance_type():
    """Get the EC2 instance type."""
    response = requests.get("http://169.254.169.254/latest/meta-data/instance-type")
    return response.text

def get_ram_details():
    """Get RAM details using a system command."""
    total_mib = os.popen("free -m | awk 'NR==2{print $2}'").read().strip()
    return f"{total_mib} MiB"

def get_processor_info():
    """Get processor information."""
    processor_info = os.popen("grep 'model name' /proc/cpuinfo | uniq").read().strip()
    return processor_info.split(":")[1].strip()

instance_type = get_instance_type()
ram_details = get_ram_details()
processor_info = get_processor_info()

print(f"Instance Type: {instance_type}")
print(f"RAM: {ram_details}")
print(f"Processor Info: {processor_info}")

Instance Type: m5a.4xlarge
RAM: 63160 MiB
Processor Info: AMD EPYC 7571


# Loading Provided Dataset. 
Note: This is cleaned up data from Lawler_Jensen_ProjectPlan 10_18.xlsx

In [14]:
import pandas as pd
import pulp

df = pd.read_excel("data.xlsx")
df['predecessorTaskIDs'].fillna("", inplace=True)
df = df[['taskID', 'task', 'predecessorTaskIDs', 'bestCaseHours', 'expectedHours', 'worstCaseHours']]
df

Unnamed: 0,taskID,task,predecessorTaskIDs,bestCaseHours,expectedHours,worstCaseHours
0,A,Describe product,,1,2,3
1,B,Develop marketing strategy,,1,2,4
2,C,Design brochure,A,2,4,6
3,D1,Requirements analysis,A,2,4,8
4,D2,Software design,D1,10,16,24
5,D3,System design,D1,10,16,24
6,D4,Coding,"D2, D3",10,16,24
7,D5,Write documentation,D4,2,4,8
8,D6,Unit testing,D4,16,24,32
9,D7,System testing,D6,16,24,32


# Defining Linear Programming Function

In [7]:
def solve_project_plan(df, case="expected"):
    # LP Model
    lp_problem = pulp.LpProblem("Project_Planning", pulp.LpMinimize)
    
    # Decision vars
    start_times = pulp.LpVariable.dicts("start_time", df["taskID"], 0, None)
    
    # Objective function
    project_end = pulp.LpVariable("project_end", 0, None)
    lp_problem += project_end, "Minimize_Project_Duration"
    
    # adding constraints
    for index, row in df.iterrows():
        task = row["taskID"]
        
        # duration of cases
        if case == "best":
            duration = row["bestCaseHours"]
        elif case == "worst":
            duration = row["worstCaseHours"]
        else:
            duration = row["expectedHours"]
        
        predecessors = [pred.strip() for pred in row["predecessorTaskIDs"].split(",") if pred.strip()]
        
        # task duration constraint
        lp_problem += start_times[task] + duration <= project_end
        
        # task dependencies constraints
        for predecessor in predecessors:
            lp_problem += start_times[task] >= start_times[predecessor] + duration

    # Solver
    lp_problem.solve()
    
    # output prep
    results = {
        "status": pulp.LpStatus[lp_problem.status],
        "optimal_duration": project_end.varValue,
        "task_start_times": {task: var.varValue for task, var in start_times.items()}
    }
    
    return results

# Running Linear Solver and Saving Results
Note: The output below is saved to results_lp_output.txt <br>
Note: The output dataframe is saved to results_lp.csv

In [15]:
simple_results_df = pd.DataFrame(columns=["Case", "Optimal_Duration", *df["taskID"].tolist()])
output_buffer = io.StringIO()

cases = ["best", "expected", "worst"]
for case in cases:
    result = solve_project_plan(df, case)
    
    print_str = f"Results for {case} case:\n"
    print_str += f"Optimal project duration: {result['optimal_duration']} hours\n"
    print_str += "Start times for tasks:\n"
    for task, start_time in result["task_start_times"].items():
        print_str += f"Task {task}: {start_time} hours\n"
    print_str += "\n"
    
    row_data = [case, result["optimal_duration"]] + list(result["task_start_times"].values())
    simple_results_df.loc[len(simple_results_df)] = row_data
    
    output_buffer.write(print_str)

with open("results_lp_output.txt", "w") as f:
    f.write(output_buffer.getvalue())

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/ubuntu/.local/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/591536a756d64fe8bdbf6554b07e5608-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/591536a756d64fe8bdbf6554b07e5608-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 39 COLUMNS
At line 109 RHS
At line 144 BOUNDS
At line 145 ENDATA
Problem MODEL has 34 rows, 16 columns and 68 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Presolve 0 (-34) rows, 0 (-16) columns and 0 (-68) elements
Empty problem - 0 rows, 0 columns and 0 elements
Optimal - objective value 70
After Postsolve, objective 70, infeasibilities - dual 0 (0), primal 0 (0)
Optimal objective 70 - 0 iterations time 0.002, Presolve 0.00
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00

Welcom

In [17]:
simple_results_df.to_csv("results_lp.csv")
simple_results_df

Unnamed: 0,Case,Optimal_Duration,A,B,C,D1,D2,D3,D4,D5,D6,D7,D8,E,F,G,H
0,best,70.0,0.0,0.0,2.0,2.0,12.0,12.0,22.0,24.0,38.0,54.0,56.0,8.0,57.0,57.0,68.0
1,expected,108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
2,worst,152.0,0.0,0.0,6.0,8.0,32.0,32.0,56.0,64.0,88.0,120.0,126.0,22.0,130.0,130.0,144.0


### Per Desire to have mixed cases based on task, with default set to optimal due to exponential nature.
Note: This really doenst make sense and I dont think we should use it, the above code seems right. <br>
Note: The output below is saved to results_output_complex.txt <br>
Note: The output dataframe is saved to results_df_complex.csv

In [10]:
results_df_complex = pd.DataFrame(columns=["Scenario", "Optimal_Duration", *df["taskID"].tolist()])
output_buffer = io.StringIO()

for task in df["taskID"]:
    for case in ["best", "expected", "worst"]:
        task_scenarios = {t: "expected" for t in df["taskID"]} 
        task_scenarios[task] = case
        
        result = solve_project_plan(df, task_scenarios)
        
        print_str = f"Results for Task {task} in {case} case:\n"
        print_str += f"Optimal project duration: {result['optimal_duration']} hours\n"
        print_str += "Start times for tasks:\n"
        for t, start_time in result["task_start_times"].items():
            print_str += f"Task {t}: {start_time} hours\n"
        print_str += "\n"
        
        row_data = [f"Task {task} ({case})", result["optimal_duration"]] + list(result["task_start_times"].values())
        results_df_complex.loc[len(results_df_complex)] = row_data
        
        output_buffer.write(print_str)

with open("results_output_complex.txt", "w") as f:
    f.write(output_buffer.getvalue())

Welcome to the CBC MILP Solver 
Version: 2.10.3 
Build Date: Dec 15 2019 

command line - /home/ubuntu/.local/lib/python3.10/site-packages/pulp/solverdir/cbc/linux/64/cbc /tmp/21df704ba0504f1499fd2856f30d9c08-pulp.mps timeMode elapsed branch printingOptions all solution /tmp/21df704ba0504f1499fd2856f30d9c08-pulp.sol (default strategy 1)
At line 2 NAME          MODEL
At line 3 ROWS
At line 39 COLUMNS
At line 109 RHS
At line 144 BOUNDS
At line 145 ENDATA
Problem MODEL has 34 rows, 16 columns and 68 elements
Coin0008I MODEL read with 0 errors
Option for timeMode changed from cpu to elapsed
Presolve 0 (-34) rows, 0 (-16) columns and 0 (-68) elements
Empty problem - 0 rows, 0 columns and 0 elements
Optimal - objective value 108
After Postsolve, objective 108, infeasibilities - dual 0 (0), primal 0 (0)
Optimal objective 108 - 0 iterations time 0.002, Presolve 0.00
Option for printingOptions changed from normal to all
Total time (CPU seconds):       0.00   (Wallclock seconds):       0.00

Wel

Note: This didnt work like I thought, but again its not what im thinking the prof is after. We can discuss and I can fix it if needed.

In [18]:
results_df_complex.to_csv("results_df_complex.csv")
results_df_complex

Unnamed: 0,Scenario,Optimal_Duration,A,B,C,D1,D2,D3,D4,D5,D6,D7,D8,E,F,G,H
0,Task A (best),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
1,Task A (expected),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
2,Task A (worst),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
3,Task B (best),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
4,Task B (expected),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
5,Task B (worst),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
6,Task C (best),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
7,Task C (expected),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
8,Task C (worst),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
9,Task D1 (best),108.0,0.0,0.0,4.0,4.0,20.0,20.0,36.0,40.0,60.0,84.0,88.0,14.0,90.0,90.0,104.0
