# 1. Notebook Setup
# Title, assignment info, and markdown overview.

In [75]:
import numpy as np
import pandas as pd
from pandas import DataFrame
import matplotlib.pyplot as plt
import random as rd
import csv
from typing import Union
from typing import Callable
import time
from copy import deepcopy

In [45]:
mapping_file_path = "data/Mappings.csv"
employees_file_path = "data/Employee.csv"
tasks_file_path = "data/Tasks.csv"
rd.seed(20)

# Data Classes
class Task:
    time:int =0
    difficulty:int =0
    deadline:int = 0
    required_skill:str = ''
    def __str__(self):
        return f"T Time:{self.time} Difficulty:{self.difficulty} Deadline:{self.deadline} requiredSkill:{self.required_skill}"

class Employee:
    available_hours:int = 0
    skill_level:int = 4
    skills:list[str]=['']

    def __str__(self):
        return f"E AH:{self.available_hours} Skill-Level:{self.skill_level} Skills:{self.skills}"




In [46]:
# I/O
"""
Task csv:

'ID', 'Time (hrs)', 'Difficulty', 'Deadline (hrs)', 'Required Skill'

Employee csv:
'Employee ID', 'Available Hrs', 'Skill Level', 'Skills'


Using an adjancey list instead of an adjacney matrix!

Since the input vector is 10 mappings x 11 features. Including a unique assignment penalty is redundant since a task mapped to 2 employees will create (10+1) mappings which doesn't work for the network input layer!
"""




from typing import Any


class DataLoader:
    num_of_tasks =10
    num_of_employees = 5
    tasks:list[Task]=[]
    employees:list[Employee]=[]
    
    def load_tasks(self,fileName=tasks_file_path):
        tasks =[]
        with open(fileName,'r') as csvfile:
            taskReader = csv.reader(csvfile)
            next(taskReader)
            for taskArr in taskReader:
                newT = Task()
                newT.time=int(taskArr[1])
                newT.difficulty= int(taskArr[2])
                newT.deadline = int(taskArr[3])
                newT.required_skill= taskArr[4]
                tasks.append(newT)
        self.tasks= tasks

    def load_employees(self,fileName=employees_file_path):
        employees=[]
        with open(fileName,'r') as csvfile:
            employeeReader = csv.reader(csvfile)
            next(employeeReader)
            for employeeArr in employeeReader:
                newEmployee = Employee()
                newEmployee.available_hours=int(employeeArr[1])
                newEmployee.skill_level= int(employeeArr[2])
                skills = employeeArr[3].split(',')
                newEmployee.skills = skills
                employees.append(newEmployee)
        self.employees = employees
    def loadAll(self):
        self.load_employees()
        self.load_tasks()
    


class MappingHandler:
    def __init__(self,tasks:list[Task],employees:list[Employee],num_of_mappings=100):
        self.tasks:list[Task]= tasks
        self.employees:list[Employee] = employees
        self.num_of_mappings = num_of_mappings
        self.mappings:list[list[int]] = []
        self.costs:list[float]=[]


    def __costFunction(self,Mapping:list[int]):
        w=0.2
        overload = 0
        skill_mismatch = 0
        difficulty_violation = 0
        deadline_violation = 0
        unique_assignment = 0
        employee_task_adj_list:list[list[Task]]=[[] for _ in range(len(self.employees))]

        for i in range(len(Mapping)):
            taskId=i
            task = self.tasks[taskId]
            num_of_employees_assigned = 0
            employeeId = Mapping[i]
            num_of_employees_assigned+=1
            employee = self.employees[employeeId]
            employee_task_adj_list[employeeId].append(task)
            # skill mismatch violation
            if task.required_skill not in employee.skills:
                skill_mismatch+=1
            if task.difficulty> employee.skill_level:
                difficulty_violation+= 1
            unique_assignment+= max(0,num_of_employees_assigned-1)



        for employeeId in range(len(employee_task_adj_list)):
            employee = self.employees[employeeId]
            sortedEmployeeTasks = employee_task_adj_list[employeeId]
            sortedEmployeeTasks.sort(key= lambda x:x.time)
            sumHours=0
            finishTime=0
            for t in sortedEmployeeTasks:
                sumHours+=t.time
                finishTime+= t.time
                deadline_violation+= max(0,finishTime - t.deadline)
            overload+= max(0,sumHours- employee.available_hours) 
        total_penalty = overload+skill_mismatch+unique_assignment+deadline_violation+ difficulty_violation
        return round(total_penalty*w,3)
    def __reset(self):
        self.mappings=[]
        self.costs=[]


    def generateMappings(self):
        unique = set()
        self.__reset()
        while len(unique) < self.num_of_mappings:
            mapping:list[int] = [0 for _ in self.tasks]         # Create a mapping for each iteration. Using an adjlist format
            # possible_assignments = [i for i in range(len(employees))]  #list of possible assignments
            for taskId in range(len(self.tasks)):
                rand_employee =rd.randint(0,len(self.employees)-1)
                mapping[taskId]=rand_employee
            string = str(mapping)
            if string not in unique: # ensure no duplicates in data generation
                unique.add(string)
                self.mappings.append(mapping)
                self.costs.append(self.__costFunction(mapping))
    

    def readCSV(self,fileName=mapping_file_path):
        self.__reset()
        with open(fileName,'r') as csvfile:
            mappingReader = csv.reader(csvfile)
            next(mappingReader)
            for line in mappingReader:
                newMapping:list[int]=[]
                for v in line:
                    if v.isdigit():
                        newMapping.append(int(v))
                    else:
                        self.costs.append(float(v))

                self.mappings.append(newMapping)


    def writeCSV(self,filepath =mapping_file_path):
        with open(filepath,'w',newline='') as csvfile:
                mappingWriter = csv.writer(csvfile,delimiter=',')
                mappingWriter.writerow(["T1",'T2','T3','T4','T5','T6','T7','T8','T9','T10','Penalty'])
                for i in range(len(self.mappings)):
                    row=[]
                    row.extend(self.mappings[i])
                    row.append(f"{self.costs[i]}")
                    mappingWriter.writerow(row)





def testDataGen():
    dl = DataLoader()
    dl.loadAll()
    mappingloader = MappingHandler(dl.tasks,dl.employees)
    mappingloader.generateMappings()
    mappingloader.writeCSV('data/Mappings.csv')


testDataGen()


# def TestDataGen():

#     all_mappings = mappingGenerator(task,employees)
#     for mapping in all_mappings:
#         cost = costFunction(mapping,task,employees)
#         print(cost)

# def verifyCost():
#     mapping=[[2],[3],[1],[4],[2],[5],[1],[3],[5],[4]]
    
#     for i in range(len(mapping)):
#         mapping[i][0]-=1
#     employees,tasks = DataLoader().loadAll()
#     # for t in tasks:
#     #     print(t)
    
#     for e in employees:
#         print(e)
#     print(costFunction(mapping,tasks,employees))





In [None]:
# 4. Preprocessing
# returns a 3-element vector
def one_hot_encode_skill(skillsVector):
    skills= ['A','B','C']
    hot_encoding=[]
    j=0
    for i in range(len(skills)):
        if j< len(skillsVector)and  skills[i] == skillsVector[j]:
            j+=1
            hot_encoding.append(1)
        else:
            hot_encoding.append(0)
    return hot_encoding





def construct_input_vector(mappingloader:MappingHandler):
    featureVector = []

    for idx,mp in  enumerate(mappingloader.mappings):
        vector=[]
        for i in range(len(mp)):

            task = mappingloader.tasks[i]
            employee = mappingloader.employees[mp[i]]
            vector.extend([task.time,task.difficulty,task.deadline])
            vector.extend(one_hot_encode_skill(task.required_skill))
            vector.extend([employee.available_hours,employee.skill_level])
            vector.extend(one_hot_encode_skill( employee.skills))
        vector.append(mappingloader.costs[idx])
        featureVector.append(vector)
        
    df = DataFrame(featureVector)
    return df




def split_train_val_test(df:DataFrame):
    df_shuffled = df.sample(frac=1, random_state=42).reset_index(drop=True)

    train_size = int(0.7 * len(df_shuffled))
    val_size = int(0.15 * len(df_shuffled))

    train_df = df_shuffled[:train_size]
    val_df = df_shuffled[train_size:train_size + val_size]
    test_df = df_shuffled[train_size + val_size:]
    return  train_df,val_df,test_df

def split_x_y(df:DataFrame):

    x = df.iloc[:, :-1]

    y = df.iloc[:, -1]
    return x,y


def create_batches(data:DataFrame, batch_size:int):
    batches=[]
    for i in range(0, len(data), batch_size):
        batches.append(data[i:i + batch_size])
    return batches


def pre_process():
    dl = DataLoader()
    dl.loadAll()
    mappingloader = MappingHandler(dl.tasks,dl.employees)
    mappingloader.readCSV('data/Mappings.csv')
    df = construct_input_vector(mappingloader)

    return split_train_val_test(df)

pre_process()



(    0    1    2    3    4    5    6    7    8    9    ...  101  102  103  104  \
 0     4    3    8    1    0    0    8    3    1    0  ...   11    0    0    1   
 1     4    3    8    1    0    0    8    3    1    0  ...   11    0    0    1   
 2     4    3    8    1    0    0   10    4    1    0  ...   11    0    0    1   
 3     4    3    8    1    0    0   10    4    1    0  ...   11    0    0    1   
 4     4    3    8    1    0    0    9    5    1    0  ...   11    0    0    1   
 ..  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...  ...   
 65    4    3    8    1    0    0   15    7    0    1  ...   11    0    0    1   
 66    4    3    8    1    0    0    8    3    1    0  ...   11    0    0    1   
 67    4    3    8    1    0    0   10    4    1    0  ...   11    0    0    1   
 68    4    3    8    1    0    0   12    6    1    1  ...   11    0    0    1   
 69    4    3    8    1    0    0   15    7    0    1  ...   11    0    0    1   
 
     105  106 

In [110]:
# 5. Model Definitions




def sig(x):
    return 1/(1+np.exp(-np.clip(x, -500, 500)))  # Clip to prevent overflow

def sig_derivative(x):
    s = sig(x)
    return s * (1 - s)

def relu(x):
    return np.maximum(0, x)

def relu_derivative(x):
    return (x > 0).astype(float)

def linear(x):
    return x

def linear_derivative(x):
    return np.ones_like(x)

def mse(y_true, y_pred):
    y_pred = y_pred.reshape(-1)
    return np.mean((y_true - y_pred)**2)

def mse_derivative(y_true, y_pred):
    return 2 * (y_pred - y_true)

architecture={
    'A':[110,256,1],
    'B':[110,128,128,1]
}

class NeuralNetwrokArgs:

    def __init__(self,name):
        self.name = name
        self.layer_dims = architecture[self.name]
        self.activation:Callable[[np.ndarray],np.ndarray]=relu
        self.activation_derivative:Callable[[np.ndarray],np.ndarray] = relu_derivative
        self.lr=0.001
        self.output_activation:Callable[[np.ndarray],np.ndarray] = linear
        self.output_activation_derivative = linear_derivative
        self.epochs=100
        self.batch_size=16

class Best:
    def __init__(self,biases,weights):
        self.biases= deepcopy(biases)
        self.weights= deepcopy(weights)
        self.lowest_val_loss=float("inf")



class NeuralNetwork:
    
    def __init__(self, neuralArgs:NeuralNetwrokArgs):

        self.name = neuralArgs.name
        self.layer_dims= neuralArgs.layer_dims
        self.weights = []
        self.biases= []
        self.activation = neuralArgs.activation
        self.activation_derivative = neuralArgs.activation_derivative
        self.output_activation = neuralArgs.output_activation
        self.output_activation_derivative = neuralArgs.output_activation_derivative
        self.lr= neuralArgs.lr
        self.batch_size = neuralArgs.batch_size
        self.epochs = neuralArgs.epochs
        for i in range(1,len(self.layer_dims)):
            w = np.random.normal(loc=0.0, scale=0.01, size=(self.layer_dims[i], self.layer_dims[i-1]))
            self.weights.append(w)
            b= np.zeros((self.layer_dims[i],1))
            self.biases.append(b)
        self.best= Best(self.biases,self.weights)
        
        
    def snapshot(self,val_loss):
        if val_loss< self.best.lowest_val_loss:
            self.best.weights = deepcopy(self.weights)
            self.best.biases = deepcopy(self.biases)
            self.best.lowest_val_loss = val_loss
        


    def forward(self, x):
        
        cache_a=[x.T]
        cache_z=[]
        for i in range(len(self.layer_dims)-1):
            z=self.weights[i].dot(cache_a[i])+self.biases[i]
            if i == len(self.layer_dims) - 2:  # Output layer
                a = self.output_activation(z)
            else:
                a = self.activation(z)
            cache_a.append(a)
            cache_z.append(z)
        return cache_a,cache_z

    def backward(self,y_true,cache_a,cache_z):
        grads={}
        deltas=[]
        y_true = np.array(y_true)
        if y_true.ndim == 1:
            y_true = y_true.reshape(1, -1) 
        #Output layer
        delta_output = 2* (cache_a[-1]-y_true) * self.output_activation_derivative(cache_z[-1])
        deltas.append(delta_output)

        #hidden layers
        current_delta = delta_output

        for l in range(len(self.layer_dims)-2,0,-1):
            delta_hidden = self.weights[l].T.dot(current_delta) * self.activation_derivative(cache_z[l-1])
            deltas.insert(0, delta_hidden)
            current_delta = delta_hidden

        for l in range(len(self.weights)):
            # ∂L/∂W^(l) = δ^(l) (a^(l-1))^T
            dW = deltas[l].dot(cache_a[l].T)
            
            # ∂L/∂b^(l) = δ^(l)
            db = np.sum(deltas[l], axis=1, keepdims=True)
            
            grads[f'dW{l+1}'] = dW
            grads[f'db{l+1}'] = db
        
        return grads



    def update_params(self,grads):
        for l in range(len(self.weights)):
            grads[f'dW{l+1}'] = np.clip(grads[f'dW{l+1}'], -1, 1)
            grads[f'db{l+1}'] = np.clip(grads[f'db{l+1}'], -1, 1)
            self.weights[l] -= self.lr * grads[f'dW{l+1}']
            self.biases[l] -= self.lr * grads[f'db{l+1}']
    


    # 6. Training Loop
    def train(self, trainData:DataFrame,valData:DataFrame,useBest=True,silent=False):
        if useBest:
            self.weights = self.best.weights
            self.biases = self.best.biases

        for e in range(self.epochs):
            start = time.time()
            trainData = trainData.sample(frac=1).reset_index(drop=True)
            batches = create_batches(trainData,self.batch_size)
            train_loss =0
            for batch in batches:
                x_train,y_train =split_x_y(batch)
                cache_a,cache_z = self.forward(x_train)
                train_loss+=mse(y_train,cache_a[-1])
                # print()
                grads = self.backward(y_train,cache_a,cache_z)
                self.update_params(grads)
            end = time.time()
            val_pred, val_loss = self.evaluate(valData)
            self.snapshot(val_loss)
            if not silent:
                print(f"Epoch: {e}   time:  {round(end-start,3)}   train_loss: {train_loss}   val_loss: {val_loss}")
        


    def evaluate(self,evalData):
        x,y_true =split_x_y(evalData)
        cache_a,cache_z = self.forward(x)
        return cache_a[-1],mse(y_true,cache_a[-1])

    def predict(self,predData):
        return self.forward(predData)[0][-1]



trainData,valData,testData = pre_process()
args = NeuralNetwrokArgs('A')
nn = NeuralNetwork(args)
print(nn.best.lowest_val_loss)
nn.epochs=100
nn.train(trainData,valData)
test_pred,test_loss = nn.evaluate(testData)






# 7. Evaluation & Plots
#- Generate the eight required figures
#- Save each via plt.savefig()
# 8. Save & Export
#- Download figures
#- Optionally, pickle model parameters

inf
Epoch: 0   time:  0.017   train_loss: 110.64489348542367   val_loss: 9.60288786522363
Epoch: 1   time:  0.016   train_loss: 33.46845947409986   val_loss: 4.5264370799247295
Epoch: 2   time:  0.027   train_loss: 27.029836680949447   val_loss: 4.515132221969664
Epoch: 3   time:  0.016   train_loss: 26.26217221797925   val_loss: 4.84353076904525
Epoch: 4   time:  0.018   train_loss: 20.890979660346186   val_loss: 4.415237774665875
Epoch: 5   time:  0.015   train_loss: 20.213639464122885   val_loss: 4.424937820572406
Epoch: 6   time:  0.01   train_loss: 21.332874064703475   val_loss: 5.800841423859511
Epoch: 7   time:  0.017   train_loss: 22.709489624943593   val_loss: 4.41423223250965
Epoch: 8   time:  0.016   train_loss: 24.120329493221313   val_loss: 5.425352388575004
Epoch: 9   time:  0.016   train_loss: 25.334143377354177   val_loss: 4.411497943882785
Epoch: 10   time:  0.022   train_loss: 21.45140794573699   val_loss: 4.513031514767235
Epoch: 11   time:  0.016   train_loss: 23.12

In [109]:
# nn.epochs= 200
# nn.train(trainData,valData,useBest=True)
# print(nn.best.lowest_val_loss)


In [111]:


class HyperArgs:
    names=['A','B']
    lr=[0.01, 0.001, 0.0001]
    batch_size=[8,16,32]
    epochs=[100,150,200]
    activations=[[sig,sig_derivative],[relu,relu_derivative]]
    def totalRuns(self):
        return len(self.names) * len(self.lr) * len(self.batch_size) * len(self.epochs) * len(self.activations)



def hyper_grid_search(gridArgs:HyperArgs):
    trainData,valData,testData = pre_process()
    results=[]
    total_runs = gridArgs.totalRuns()
    run=0
    for name in gridArgs.names:
        for lr in gridArgs.lr:
            for batch in gridArgs.batch_size:
                for epoch in gridArgs.epochs:
                    for activation in gridArgs.activations:
                        print(f"Iteration: {run}/{total_runs}")
                        run+=1
                        args = NeuralNetwrokArgs(name)
                        args.lr = lr
                        args.batch_size = batch
                        args.epochs = epoch
                        args.activation = activation[0]
                        args.activation_derivative = activation[1]
                        args.layer_dims = architecture[name]
                        nn= NeuralNetwork(args)
                        nn.train(trainData,valData,silent=True)
                        results.append({
                            "model_name": name,
                            "lr": lr,
                            "batch_size": batch,
                            "epochs": epoch,
                            "activation": activation[0].__name__,
                            "val_loss": nn.best.lowest_val_loss,
                            "nn": nn  
                        })
    results.sort(key= lambda x: x['val_loss'])
    top_15 = results[:15]
    for i, res in enumerate(top_15, 1):
        print(f"#{i}: Model={res['model_name']}, LR={res['lr']}, Batch={res['batch_size']}, "
            f"Epochs={res['epochs']}, Act={res['activation']}, Val Loss={res['val_loss']:.4f}")
    return results


results = hyper_grid_search(HyperArgs())




Iteration: 0/108
Iteration: 1/108
Iteration: 2/108
Iteration: 3/108
Iteration: 4/108
Iteration: 5/108
Iteration: 6/108
Iteration: 7/108
Iteration: 8/108
Iteration: 9/108
Iteration: 10/108
Iteration: 11/108
Iteration: 12/108
Iteration: 13/108
Iteration: 14/108
Iteration: 15/108
Iteration: 16/108
Iteration: 17/108
Iteration: 18/108
Iteration: 19/108
Iteration: 20/108
Iteration: 21/108
Iteration: 22/108
Iteration: 23/108
Iteration: 24/108
Iteration: 25/108
Iteration: 26/108
Iteration: 27/108
Iteration: 28/108
Iteration: 29/108
Iteration: 30/108
Iteration: 31/108
Iteration: 32/108
Iteration: 33/108
Iteration: 34/108
Iteration: 35/108
Iteration: 36/108
Iteration: 37/108
Iteration: 38/108
Iteration: 39/108
Iteration: 40/108
Iteration: 41/108
Iteration: 42/108
Iteration: 43/108
Iteration: 44/108
Iteration: 45/108
Iteration: 46/108
Iteration: 47/108
Iteration: 48/108
Iteration: 49/108
Iteration: 50/108
Iteration: 51/108
Iteration: 52/108
Iteration: 53/108
Iteration: 54/108
Iteration: 55/108
It

In [112]:
for i, res in enumerate(results, 1):
        print(f"#{i}: Model={res['model_name']}, LR={res['lr']}, Batch={res['batch_size']}, "
            f"Epochs={res['epochs']}, Act={res['activation']}, Val Loss={res['val_loss']:.4f}")

#1: Model=A, LR=0.01, Batch=8, Epochs=200, Act=sig, Val Loss=3.6814
#2: Model=A, LR=0.001, Batch=8, Epochs=200, Act=relu, Val Loss=3.7902
#3: Model=B, LR=0.01, Batch=8, Epochs=200, Act=sig, Val Loss=3.8909
#4: Model=A, LR=0.01, Batch=8, Epochs=150, Act=sig, Val Loss=3.9633
#5: Model=A, LR=0.01, Batch=16, Epochs=200, Act=sig, Val Loss=3.9802
#6: Model=A, LR=0.001, Batch=16, Epochs=200, Act=relu, Val Loss=4.0502
#7: Model=B, LR=0.001, Batch=8, Epochs=200, Act=relu, Val Loss=4.0798
#8: Model=B, LR=0.001, Batch=16, Epochs=150, Act=relu, Val Loss=4.0928
#9: Model=B, LR=0.001, Batch=16, Epochs=200, Act=relu, Val Loss=4.0941
#10: Model=A, LR=0.001, Batch=8, Epochs=200, Act=sig, Val Loss=4.0971
#11: Model=A, LR=0.01, Batch=16, Epochs=100, Act=sig, Val Loss=4.0975
#12: Model=A, LR=0.01, Batch=32, Epochs=200, Act=sig, Val Loss=4.0976
#13: Model=A, LR=0.01, Batch=8, Epochs=100, Act=sig, Val Loss=4.1031
#14: Model=B, LR=0.001, Batch=8, Epochs=150, Act=relu, Val Loss=4.1062
#15: Model=B, LR=0.01, B