In [None]:
import numpy as np
import torch
from env import MultiAgentEnv
from sklearn.ensemble import RandomForestRegressor
from tqdm import tqdm
import pandas as pd
import json
import torch.nn as nn
import torch.optim as optim
import random
from collections import deque
from new_env import New_env
import gymnasium as gym
from sklearn.metrics import f1_score, accuracy_score, confusion_matrix
from imblearn.over_sampling import RandomOverSampler
from joblib import dump, load
from sklearn.metrics.pairwise import cosine_similarity
import matplotlib.pyplot as plt 
import seaborn as sns 
from sklearn.decomposition import PCA
from sklearn.utils import shuffle
from imblearn.under_sampling import NearMiss

In [None]:
#X_train length 64

X_train = pd.read_csv('/Users/louisedurand-janin/Documents/GitHub/HrFlow_Data_Challenge/data/X_train.csv')
X_train['employee embedding'] = X_train['employee embedding'].apply(lambda x: np.array(json.loads(x), dtype=np.float64))
X_train['company embedding'] = X_train['company embedding'].apply(lambda x: np.array(json.loads(x), dtype=np.float64))

In [None]:
# Y TRAIN
y_train = pd.read_csv('/Users/louisedurand-janin/Documents/GitHub/HrFlow_Data_Challenge/data/y_train.csv', index_col=0)
encoding_map = {
        "Assistant": 0,
        "Executive": 1,
        "Manager": 2,
        "Director": 3,
        
    }

y_train = np.array([encoding_map[category] for category in y_train['position']])

In [None]:
# Length 64 !!

# Convert embeddings columns in PyTorch tensors 
employee_embedding_tensor = torch.tensor(np.vstack(X_train['employee embedding'].values), dtype=torch.float64)
company_embedding_tensor = torch.tensor(np.vstack(X_train['company embedding'].values), dtype=torch.float64)

# Concatenate both
combined_tensor = torch.cat([employee_embedding_tensor, company_embedding_tensor], dim=1)

In [None]:
# Length 38 !!
#Reduce company embeddings to length 6 

embeddings_company = np.vstack(X_train['company embedding'].values)


# Création de l'objet PCA pour réduire à 6 dimensions
pca = PCA(n_components=6)

# Fit et transformation des embeddings
reduced_embeddings = pca.fit_transform(embeddings_company)

print(reduced_embeddings.shape) 
# Convert embeddings columns in PyTorch tensors 
employee_embedding_tensor = torch.tensor(np.vstack(X_train['employee embedding'].values), dtype=torch.float64)
company_embedding_tensor = torch.tensor(reduced_embeddings, dtype=torch.float64)

# Concatenate both
combined_tensor = torch.cat([employee_embedding_tensor, company_embedding_tensor], dim=1)
print("combined tensor shape", combined_tensor.shape)

In [None]:
# TEST 

X_test = pd.read_csv('/Users/louisedurand-janin/Documents/GitHub/HrFlow_Data_Challenge/data/X_train.csv')
X_test['employee embedding'] = X_test['employee embedding'].apply(lambda x: np.array(json.loads(x), dtype=np.float64))
X_test['company embedding'] = X_test['company embedding'].apply(lambda x: np.array(json.loads(x), dtype=np.float64))

In [None]:
# TEST length 64

# Convertir les colonnes d'embeddings en tensors PyTorch
employee_embedding_tensor_test = torch.tensor(np.vstack(X_test['employee embedding'].values), dtype=torch.float64)
company_embedding_tensor_test = torch.tensor(np.vstack(X_test['company embedding'].values), dtype=torch.float64)

# Concaténer les deux tensors le long de la dimension appropriée (axis=1 pour ajouter des colonnes)
combined_tensor_test = torch.cat([employee_embedding_tensor_test, company_embedding_tensor_test], dim=1)

In [None]:
# TEST length 38 

embeddings_company_test = np.vstack(X_test['company embedding'].values)


# Fit et transformation des embeddings
reduced_embeddings_test = pca.transform(embeddings_company_test)

print(reduced_embeddings.shape) 
# Convert embeddings columns in PyTorch tensors 
employee_embedding_tensor_test = torch.tensor(np.vstack(X_test['employee embedding'].values), dtype=torch.float64)
company_embedding_tensor_test = torch.tensor(reduced_embeddings_test, dtype=torch.float64)

# Concatenate both
combined_tensor_test = torch.cat([employee_embedding_tensor_test, company_embedding_tensor_test], dim=1)
print("combined tensor shape", combined_tensor_test.shape)

In [None]:
# Y TEST
y_test = pd.read_csv("/Users/louisedurand-janin/Documents/GitHub/HrFlow_Data_Challenge/data/y_test.csv", index_col=1)
encoding_map = {
        "Assistant": 0,
        "Executive": 1,
        "Manager": 2,
        "Director": 3,
    }
y_test = np.array([encoding_map[category] for category in y_test['position']])

## OverSampling

In [None]:
ros = RandomOverSampler(random_state=0)
X_oversampled, y_oversampled = ros.fit_resample(combined_tensor, y_train)
X_oversampled, y_oversampled = shuffle(X_oversampled, y_oversampled, random_state=42)

## UnderSampling

In [None]:

undersample = NearMiss(version=1, n_neighbors=3)
X_undersampled, y_undersampled = undersample.fit_resample(np.array(combined_tensor), y_train)
X_undersampled, y_undersampled = shuffle(X_undersampled, y_undersampled, random_state=42)

In [None]:
import random

class ReplayBuffer:
    def __init__(self, capacity, device):
        self.capacity = capacity # capacity of the buffer
        self.data = []
        self.index = 0 # index of the next cell to be filled
        self.device = device
    def append(self, s, a, r, s_, d):
        if len(self.data) < self.capacity:
            self.data.append(None)
        self.data[self.index] = (s, a, r, s_, d)
        self.index = (self.index + 1) % self.capacity
    def sample(self, batch_size):
        batch = random.sample(self.data, batch_size)
        return list(map(lambda x:torch.Tensor(np.array(x)).to(self.device), list(zip(*batch))))
    def __len__(self):
        return len(self.data)

In [None]:
def greedy_action(network, state):
    device = "cuda" if next(network.parameters()).is_cuda else "cpu"
    with torch.no_grad():
        Q = network(torch.Tensor(state).unsqueeze(0).to(device))
        #print("Q", Q)
        return torch.argmax(Q).item()

In [None]:


class dqn_agent:
    def __init__(self, config, model, memory):
        device = "cuda" if next(model.parameters()).is_cuda else "cpu"
        self.gamma = config['gamma']
        self.batch_size = config['batch_size']
        self.nb_actions = config['nb_actions']
        self.memory = memory
        self.epsilon_max = config['epsilon_max']
        self.epsilon_min = config['epsilon_min']
        self.epsilon_stop = config['epsilon_decay_period']
        self.epsilon_delay = config['epsilon_delay_decay']
        self.epsilon_step = (self.epsilon_max-self.epsilon_min)/self.epsilon_stop
        self.model = model.double()
        self.criterion = torch.nn.MSELoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr=config['learning_rate'])
    
    def gradient_step(self):
        if len(self.memory) > self.batch_size:
            X, A, R, Y, D = self.memory.sample(self.batch_size)
            Y = Y.double()
            A = A.double()
            X = X.double()
            #print(X.shape)
            #print(Y.shape)
            #print(A.shape)
            QYmax = self.model(Y).max(1)[0].detach()

            #print(QYmax.shape)
            #update = torch.addcmul(R, self.gamma, 1-D, QYmax)
            update = torch.addcmul(R, 1-D, QYmax, value=self.gamma)
            QXA = self.model(X).gather(1, A.to(torch.long))
            loss = self.criterion(QXA, update.unsqueeze(1))
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step() 
    def train(self, env, max_episode):
        episode_return = []
        episode = 0
        episode_cum_reward = 0
        state, _ = env.reset(True)
        epsilon = self.epsilon_max
        step = 0
        donebis=False
        greed_action_stay=0
        while episode < max_episode:
            # update epsilon
            if step > self.epsilon_delay:
                epsilon = max(self.epsilon_min, epsilon-self.epsilon_step)

            # select epsilon-greedy action
            if np.random.rand() < epsilon:
                action = env.action_space.sample()
            else:
                action = greedy_action(self.model, state)
                greed_action_stay+=1
                if greed_action_stay==2:
                    donebis=True

            # step
            predicted_next_position, predicted_next_emb_state, reward, done, _ = env.step(action)
            #print("action", action)
            #print("next state", predicted_next_position)
            #print("reward", reward)
            #print("target",y_undersampled[env.index])
            #self.memory.append(state, action, reward, predicted_next_emb_state, done)
            episode_cum_reward += reward

            # train
            self.gradient_step()

            # next transition
            step += 1
            if done or donebis:
                greed_action_stay = 0
                donebis=False
                
                episode += 1
                print("Episode ", '{:3d}'.format(episode), 
                      ", epsilon ", '{:6.2f}'.format(epsilon), 
                      ", batch size ", '{:5d}'.format(len(self.memory)), 
                      ", episode return ", '{:4.1f}'.format(episode_cum_reward),
                      sep='')
                if episode>=env.Emb.shape[0]:
                    state,_=env.reset(True)
                else:
                    state, _ = env.reset(False)
                episode_return.append(episode_cum_reward)
                episode_cum_reward = 0
            else:
                state = predicted_next_emb_state

        return episode_return

In [None]:
import gymnasium as gym
#cartpole = gym.make('CartPole-v1', render_mode="rgb_array")
import torch
import torch.nn as nn
import matplotlib.pyplot as plt
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Declare network

state_dim = 39
n_action = 2
nb_neurons=64
DQN = torch.nn.Sequential(nn.Linear(state_dim, nb_neurons),
                          nn.ReLU(),
                          nn.Linear(nb_neurons, nb_neurons),
                          nn.ReLU(), 
                          nn.Linear(nb_neurons, n_action)).to(device)

# DQN config
config = {'nb_actions': 2,
          'learning_rate': 0.001,
          'gamma': 0.95,
          'buffer_size': 10000,
          'epsilon_min': 0.01,
          'epsilon_max': 1,
          'epsilon_decay_period': 1000,
          'epsilon_delay_decay': 20,
          'batch_size': 20}

# Train agent
agent = dqn_agent(config, DQN.double(), replay_buffer)
env = New_env(torch.tensor(X_undersampled, dtype=torch.float64), torch.tensor(y_undersampled, dtype=torch.float64))
scores = agent.train(env, 50000)
plt.plot(scores)

In [None]:
# Evaluate on TRAIN
career_env_test = New_env(torch.tensor(X_undersampled, dtype=torch.float64), torch.tensor(y_undersampled, dtype=torch.float64))
pred=[]
y_pred = []
s,_ =  career_env_test.reset(True)
i=0
#for t in tqdm(range(len(y_test))):
for t in tqdm(range(len(y_undersampled))):
    for k in range(5):
        a = greedy_action(agent.model,s)

        predicted_next_position, predicted_next_emb_state, reward, d, _ = career_env_test.step(a)

        s = predicted_next_emb_state
        if t in [0,1,2,3]:
            print("action chosen", a)

            print("s", predicted_next_position)
            print("target", y_undersampled[i])
        #if a ==0:
         #   break
        
        if d:
            break
    i+=1
    pred.append(s)
    y_pred.append(predicted_next_position.item())
    s,_=career_env_test.reset(False)

In [None]:
print(f1_score(y_pred, y_undersampled, average='macro'))

In [None]:
career_env_test = New_env(combined_tensor_test, y_test)

In [None]:
pred=[]
y_pred = []
s,_ =  career_env_test.reset(True)
i=0
for t in tqdm(range(len(y_test))):
#for t in tqdm(range(4)):
    for k in range(4):
        a = greedy_action(agent.model,s)

        predicted_next_position, predicted_next_emb_state, reward, d, _ = career_env_test.step(a)

        s = predicted_next_emb_state
        if t in [0,1,2,3]:
            print("action chosen", a)

            print("s", predicted_next_position)
            print("target", y_test[i])
        #if a ==0:
         #   break
        
        if d:
            break
    i+=1
    pred.append(s)
    y_pred.append(predicted_next_position.item())
    s,_=career_env_test.reset(False)

In [None]:
print(f1_score(y_pred, y_test, average="macro"))