In [None]:
# import employee class
from DataGenerator import DataExtraction,TrainModel,Employee,DatasetBuilder

In [None]:
# build the dataset
filename = 'SocEngExampleDataset.txt'
NumExamples = 10000
DatasetBuilder(NumExamples,filename)

In [None]:
# Sentiment score definition 
sent_def = {0:'negative',1:'neutral',2:'positive'}
# load the dataset 
phrases,scores = DataExtraction(filename)

In [None]:
# example data
print(phrases[0])
print('-----------')
print('Sentiment score for the phrase {}'.format(scores[0]))

In [None]:
# train the sentiment classifier
test_model,test_vctzr = TrainModel(phrases,scores)

In [None]:
# Create an employee
resp_prob = [[0.1,0.9], # probability of taking action/not taking action when sentiment is negative
             [0.5,0.5], # probability of taking action/not taking action when sentiment is neutral
             [0.9,0.1]] # probability of taking action/not taking action when sentiment is positive
test_employee = Employee(test_model,test_vctzr,resp_prob)

In [None]:
# Showcase the inherited method: getMessageSentiment()
message_sent = test_employee.getMessageSentiment(phrases[0])
print('The messages sentiment score is {} which corresponds to a {} sentiment'.format(message_sent,sent_def[message_sent]))

In [None]:
# Showcase the method unique to the employee class: makeDecision()
dec_def = {0:"Don't take action",1:"Take action"}
employee_decision = test_employee.makeDecision(phrases[0])
print("The employee's decision score class is {} which correspond to: {}".format(employee_decision,dec_def[employee_decision]))

In [None]:
# SPACE

In [None]:
# Import company env class
from TrainSocEngAgent import CompanyEnv

In [None]:
# Create a company environment
NumEmployees = 5
comp_env = CompanyEnv(NumEmployees,test_model,test_vctzr,phrases,scores)

In [None]:
# Show the different methods 
help(comp_env)

In [None]:
# highlight important variables: status, resp_prob
print(comp_env.status)

In [None]:
print(comp_env.resp_prob)

In [None]:
# SPACE 

In [None]:
# Import agent class 
from TrainSocEngAgent import Agent
# import pytorch related functions
import torch 
from torch import nn
import numpy as np

In [None]:
# Create an artificial agent
latent_dim = 100
num_actions = 3 # (1) Send negative message, (2) Send neutral message, (3) Send positive message
test_agent = Agent(NumEmployees,latent_dim,num_actions)

In [None]:
# Print the architecture 
print(test_agent)

In [None]:
# Generate the agent's output
test_input = torch.tensor([0,0,1,0,1]).reshape(1,-1).float()
state_value,behav_policy = test_agent(test_input)
print('The value of the current state is {}'.format(state_value[0,0]))
print('---------')
print('the agents behavioral policy \n')
print(behav_policy)

In [None]:
# Show the interaction between the agent the environment
# reset the initial environment and get the initial state
initial_state = comp_env.env_reset()
print('Initial state: {}'.format(initial_state))
# provide this state as input to the agent
state_value,behav_policy = test_agent(torch.tensor(initial_state).reshape(1,-1).float())
behav_policy = behav_policy.detach().numpy()
# extract the actions for each employee
actions = []
for i in range(behav_policy.shape[1]):
    tmp_action = np.random.choice([0,1,2],p=behav_policy[0,i,:])
    actions.append(tmp_action)
# provide the agent's actions to the environment
next_state,reward,previous_state,actions,done = comp_env.take_action(actions)
# print the next state
print('reward for the actions: {}'.format(reward))
print('Next state: {}'.format(next_state))

In [None]:
# SPACE

In [None]:
# Import necessary functions
from TrainSocEngAgent import EpisodeQueue,GatherData,train_Agent,GAMMA,STORAGE_SIZE,EPI_Q,policy_Q
import os

In [None]:
# Go through the training process for the agent
# Define variables
NumIters = 600
NumEpisodes = 50
NumEmployees = 3 # Note: To get convergence, you will need to increase NumIters whenever you increase the number of employees
NumInstances = 1000
latent_dim = 100
num_actions = 3
learning_rate = 1e-3
batch_size =100
switch_flag = False
current_files = os.listdir()
data_filename = 'SocialEngText.txt'
if data_filename not in current_files:
    print('Generating the text data')
    DatasetBuilder(10000,data_filename)
# Load the data
phrases,scores = DataExtraction(data_filename)
print('Extracted the data')
# train the employee's sentiment classifier
model,vctzr = TrainModel(phrases,scores)
print('Trained the sentiment classifier')
# Initialize the agent
agent = Agent(NumEmployees,latent_dim,num_actions)
opt = torch.optim.Adam(agent.parameters(),lr=learning_rate)
# Initialize the environment
env = CompanyEnv(NumEmployees,model,vctzr,phrases,scores)

In [None]:
# Start the training process
trn_loss,trn_perfs,tst_perfs = [],[],[]
all_policy_data = []
all_episode_data = []
print('Starting the agent training process')
for iter in range(NumIters):
    if iter%1 == 0 and iter !=0:
        print('Iteration {}'.format(iter))
    if iter !=0 and iter%400 == 0 and switch_flag == True:
        print('switching the resp probs')
        env.reset_resp_probs()
    # gather data
    avg_trn_rew = GatherData(env,agent,NumEpisodes,printFlag=True)
    trn_perfs.append(avg_trn_rew)
    # extract episodes for use in the agent's training process
    episodes = []
    policy_data1 = []
    for i in range(min(NumInstances,EPI_Q.queue_size())):
        episodes.append(EPI_Q.dequeue())
    for i in range(policy_Q.queue_size()):
        policy_data1.append(policy_Q.dequeue())
    if iter%100 == 0 and iter != 0: # 100
        all_policy_data.append(policy_data1)
        all_episode_data.append(episodes)
    else:
        policy_data1 = []
    # train the agent
    tr_loss = train_Agent(agent,episodes,opt,batch_size)
    trn_loss.append(tr_loss)
    # print('------Test Case-------')
    avg_tst_rew = GatherData(env,agent,NumEpisodes,trainFlag = False)
    tst_perfs.append(avg_tst_rew)
    # print('----------------------')

In [None]:
# SPACE

In [None]:
# Import visualization related functions
from VizualizationFunctions import Feature_Scores_Plot,AgentPredictionPerEmployee,PerfOverTraining,CompanyViz,CreateConfusionMatrix
import pickle

In [None]:
# Load data
perfdata = pickle.load(open('PerfData_TestAgent.pickle','rb'))
epidata = pickle.load(open('PolicyData_EpisodeData_overTraining.pickle','rb'))
clfdata = pickle.load(open('SentimentClsfrTrnAndPreds.pickle','rb'))#SentimentClsfrTrnAndPreds.pickle

In [None]:
# Create employee related visualizations
trX,tsX,trY,tsY,ypred = clfdata
trX2 = trX.toarray()

In [None]:
mi_scores = Feature_Scores_Plot(trX2[:,:500],trY)

In [None]:
CreateConfusionMatrix(tsY,ypred,labels=list(sent_def.values()))

In [None]:
# Create Agent related visualizations
trn_data,tst_data = perfdata
all_policy_data,all_episode_data = epidata
first_poli = all_policy_data[0]
sec_poli = all_policy_data[1]
third_poli = all_policy_data[2]
# 
first_poli2 = all_policy_data[3]
sec_poli2 = all_policy_data[5]
third_poli2 = all_policy_data[6]

In [None]:
PerfOverTraining(trn_data,3)

In [None]:
print('Assigned Sentiments [2,2,2]')
AgentPredictionPerEmployee(first_poli[0][0],['Neg.','Neutral','Pos.'])
AgentPredictionPerEmployee(sec_poli[0][0],['Neg.','Neutral','Pos.'])
AgentPredictionPerEmployee(third_poli[0][0],['Neg.','Neutral','Pos.'])

In [None]:
print('Assigned Sentiments [0,2,2]')
AgentPredictionPerEmployee(first_poli2[0][0],['Neg.','Neutral','Pos.'])
AgentPredictionPerEmployee(sec_poli2[0][0],['Neg.','Neutral','Pos.'])
AgentPredictionPerEmployee(third_poli2[0][0],['Neg.','Neutral','Pos.'])

In [None]:
NumEmployees = 3
compviz = CompanyViz(NumEmployees)

In [None]:
# Note: The current data has 50 episodes to choose from at 7 different moments in the training process
recombine_data = compviz.EpiSeqPackaging(all_policy_data,all_episode_data,3,22)

In [None]:
compviz.episode_sequence(recombine_data)