<a href="https://colab.research.google.com/github/DaSilva-JV/AIED2025/blob/main/Known_outcome_Allocating_Dynamic_and_Finite_Resources_to_a_set_of_Tasks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from google.colab import drive

# Mount your Google Drive to Collaboratory
drive.mount('/content/gdrive')

# Install pulp
!pip install pulp

# Getting the items' k-parameter
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/calculated_k_mt.csv')
k = np.array(df['k_parameter'])

# Getting the number of items solved for each volunteer (for all the 10.000)
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/n_correct_answers_mt.csv')
n_solved = np.array(df['n_solved'])

# Getting the sampled volunteers
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Sampled_Volunteers.csv')
sampled_volunteers = np.array(df['Volunteer'])

# Getting the volunteers parameters
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/Estimated_Theta_mt.csv')
all_theta = np.array(df['Theta'])


Mounted at /content/gdrive
Collecting pulp
  Downloading PuLP-2.8.0-py3-none-any.whl (17.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pulp
Successfully installed pulp-2.8.0


In [None]:
from google.colab import files
import pandas as pd
import numpy as np
import time
import pulp
import math
from scipy.stats import norm
import csv
from datetime import datetime
from google.colab import auth
import gspread
from oauth2client.client import GoogleCredentials
from google.auth import default



def linear_programming_mod(n_items, n_volunteers, n_tries, n_solutions, vezes_resolvidas, ep, df_res):
    # Create the 'prob' variable to contain the problem data
    prob = pulp.LpProblem("ItemsVolunteers", pulp.LpMaximize)

    # create list of all possible combinations of questions and volunteers
    possible_item_volunteer = [(i, j) for i in range(n_items) for j in range(n_volunteers)]

    # create a binary variable to state that a question was presented to a volunteer
    x = pulp.LpVariable.dicts("item_volunteer", possible_item_volunteer, lowBound=0, upBound=1, cat=pulp.LpContinuous)

    # The objective function is added to 'prob' first
    prob += pulp.lpSum([df_res.iloc[sampled_volunteers[(n_volunteers * ep)+t_v[1]], t_v[0]] * x[t_v] for t_v in possible_item_volunteer])

    # ----------- #
    # Constraints #
    # ----------- #
    # specify the number of tries for each volunteer
    for j in range(n_volunteers):
        prob += pulp.lpSum([x[(i, j)] for i in range(n_items)]) <= n_tries[j]

    # specify the desired quantity of solutions for each question
    for i in range(n_items):
        if (n_solutions[i] - vezes_resolvidas[i]) > 0:
            prob += pulp.lpSum([df_res.iloc[sampled_volunteers[(n_volunteers * ep)+j], i] * x[(i, j)] for j in range(n_volunteers)]) <= (n_solutions[i] - vezes_resolvidas[i])
        else:
            prob += pulp.lpSum([df_res.iloc[sampled_volunteers[(n_volunteers * ep)+j], i] * x[(i, j)] for j in range(n_volunteers)]) == 0

    # The problem data is written to an .lp file
    prob.writeLP("ItemsVolunteers.lp")

    # The problem is solved using PuLP's choice of Solver
    prob.solve()
    # prob.solve(pulp.PULP_CBC_CMD(gapRel=0.1))
    # status = prob.solve(solver=pulp.GLPK(msg=False))

    # Write the solutions in a pandas dataframe
    df = pd.DataFrame(columns=['item', 'volunteer', 'action'])
    # Each of the variables is printed with it's resolved optimum value
    for v in prob.variables():
        s = v.name.replace('item_volunteer_(', '').replace('_', '').replace(')', '')  # Deixa apenas o valor do item e do voluntário separado por vírgula em uma string
        s_list = s.split(',')  # Transforma a string em uma lista

        # Transforma o formato de string para int em cada componente da lista
        s_list[0] = int(s_list[0])
        s_list[1] = int(s_list[1])

        # Adiciona a solução da PL (correspondendo se o item deve ser ou não apresentado ao voluntário)
        s_list.append(v.varValue)

        # Acredcenta a solução em um dataframe
        if s_list[2] > 0:
            #df = df.append(dict(zip(df.columns, s_list)), ignore_index=True)
            df = pd.concat([df, pd.DataFrame.from_records([dict(zip(df.columns, s_list))])], ignore_index=True)

    print('#------------------------------#')
    print("Total number of solutions = ", pulp.value(prob.objective))  # The optimised objective function value is printed to the screen
    print("Status:", pulp.LpStatus[prob.status])  # The status of the solution is printed to the screen
    print("Episódio {}".format(ep+1))  # O episódio em curso / finalizado
    #print('#------------------------------#')

    return df
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
def get_enem_data():
    # Load the 180 answers from the 10000 students
    df_respostas = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/file.txt', sep=' ', header=None)

    df_respostas = df_respostas.iloc[:, 0:175]

    # Load the parameters a, b and c from the 180 items
    df_param_mt = pd.read_fwf('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/param_enem_mt.txt')

    # Add the type of the item
    df_param_mt['Type'] = '1'

    # Put all parameters together
    df_items = df_param_mt

    # Delete the column Unnamed
    del df_items['Unnamed: 0']

    return df_items, df_respostas
# ------------------------------------------------------------------------------


# Policy for choosing the tasks to volunteers
def pl_policy(vol_, df_sol, flag_task):
    # Take the questions found from the IntegerProgramming
    questions = df_sol[df_sol['volunteer'] == vol_]
    w = list(questions['action'])

    valid_questions = {'item': [], 'action_w': []}
    # Checa se a tarefa ainda não foi apresentada ao voluntário
    # e se ela ainda não foi resolvida a quantidade de vezes desejada
    for j, q in enumerate(questions['item']):
        q = int(q)
        if (q not in tarefas_apresentadas[vol_]) and (vezes_resolvidas[q] < n_solutions[q]):
            valid_questions['item'].append(q)
            valid_questions['action_w'].append(w[j])

    # Checa se existem tarefas disponíveis para o usuário
    if valid_questions['item']:
        # caso exista, seleciona uma tarefa aleatoriamente e acrescenta a tarefa na lista de apresentadas para o usuário
        probs = np.array(valid_questions['action_w']) / sum(valid_questions['action_w'])
        tarefa = np.random.choice(valid_questions['item'], p=probs)
        return tarefa
    else:
        if flag_task:
          # retornada a primeira tarefa disponível mesmo que não tenha sido selecionada pela PL
          for q in range(n_questions):
            if (q not in tarefas_apresentadas[vol_]) and (vezes_resolvidas[q] < n_solutions[q]):
                return q
        return -1



if __name__ == '__main__':
    level = 4  # Levels to define n(t) and m(v)
    n_episodes = 100   # Number of episodes
    task_not_in_lp_sol = False    # Caso True retorna a primeira tarefa disponível, mesmo que não tenha sido selecionada pela programação linear (PL)

    sheet_name = 'non-stochastic_qtd_solucoes_kk_' + '_' + str(level) + '_mod_' + str(task_not_in_lp_sol)

    # Tries and solutions
    n_volunteers = 100
    n_questions = 45
    if level == 0 or level == 1 or level == 2:
        n_tries = np.array(10000 * [16])  # Levels 0, 1 and 2
    else:  # level == 3 or level == 4:
        n_tries = n_solved  # Levels 3 and 4

    # Getting the items parameters
    test_type = '1'  # Mathematics test
    df_param, df_respostas = get_enem_data()
    df_resp = df_respostas.iloc[:, 0:45]  # Filter only the mathematics items
    a = np.array(df_param[df_param['Type'] == test_type]['Dscrmn'])
    b = np.array(df_param[df_param['Type'] == test_type]['Dffclt'])
    c = np.array(df_param[df_param['Type'] == test_type]['Gussng'])

    # Simulate for many episodes
    qtd_total_solucoes = []

    for ep in range(0, n_episodes):
        # Inicializa a contagem de tempo
        start = time.time()

        # Authenticate to google spreadsheet

        auth.authenticate_user()
        #gc = gspread.authorize(GoogleCredentials.get_application_default())  # Esse token só dura uma hora (só vai escrever um episódio por vez)

        creds, _ = default()
        gc = gspread.authorize(creds)

        #gc.login()  # Refreshes the token
        # Criação da spreadsheet
        if ep == 0:
            sh = gc.create(sheet_name)

        # Open our new sheet and add some data.
        worksheet = gc.open(sheet_name).sheet1

        # Load the parameters to solve the LP
        n_tries_100 = n_tries[sampled_volunteers[(n_volunteers * ep):(n_volunteers * (ep + 1))]]
        # theta_100 = np.random.normal(0, 1, n_volunteers)
        theta_100 = all_theta[sampled_volunteers[(n_volunteers * ep):(n_volunteers * (ep + 1))]]

        # Calculation of parameter k for each group of 100 sampled volunteers
        perc_resp_correta = list()

        for i in range(45):
            perc_resp_correta.append(df_resp.iloc[sampled_volunteers[(n_volunteers * ep):(n_volunteers * (ep + 1))], i].mean())

        if level == 0:
            n_solutions = 45 * [12]  # Level 0
        elif level == 1:
            n_solutions = 45 * [35]  # Level 1
        elif level == 2 or level == 3:
            n_solutions = np.array(perc_resp_correta) * n_volunteers  # Levels 2 and 3
        else:  # level == 4:
            n_solutions = np.array(perc_resp_correta) * n_volunteers * 0.5  # Level 4

        # Quantidade de vezes que cada questão foi resolvida
        vezes_resolvidas = np.zeros(n_questions)
        qtd_total_presented = 0

        # Tarefas apresentadas para cada voluntário
        tarefas_apresentadas = dict()
        tentativas = dict()
        respostas_apresentadas = dict()

        # Matrix with the probabilities and the answers for each item
        num = 1000
        probs = np.zeros((n_questions, num))


        # Tem que mandar a resposta também para o LP:
        # Solving the LP
        df_s = linear_programming_mod(n_questions, n_volunteers, n_tries_100, n_solutions, vezes_resolvidas, ep, df_resp)



        # Para cada voluntário, é atribuída uma tarefa e é calculada a probabilidade do usuário resolvê-la
        for v_ in range(n_volunteers):

            # No processo LPPS faz esse ajuste, então vou fazer aqui tbm pra comparação ser válida
            v = (n_volunteers - 1) - v_

            # Cada voluntário recebe até m tarefas distintas
            tentativas[v] = 0
            tarefas_apresentadas[v] = list()
            respostas_apresentadas[v] = list()

            # Obtem o vetor com as respostas do enem para o voluntário v no episódio ep
            resp = np.array(df_resp.iloc[sampled_volunteers[v + n_volunteers * ep]])


            while tentativas[v] < n_tries[sampled_volunteers[v + n_volunteers * ep]]:
                # Obtida a tarefa a ser apresentada ao voluntário
                t = pl_policy(v, df_s, task_not_in_lp_sol)


                if t == -1:
                    break
                else:
                    # Verificação pelos dados do enem se o estudante resolveu a tarefa
                    # resp = np.array(df_resp.iloc[sampled_volunteers[v + n_volunteers * ep]])
                    resolvida = resp[t]

                    # Atualiza a quantidade de vezes que aquela tarefa foi resolvida e incrementa as tentativas
                    vezes_resolvidas[t] += resolvida
                    tentativas[v] += 1
                    tarefas_apresentadas[v].append(t)
                    respostas_apresentadas[v].append(resp[t])

            qtd_total_presented += tentativas[v]

        qtd_total_solucoes.append(vezes_resolvidas.sum())

        # Calculate the time at the and of the episode
        end = time.time()

        # Save the number of solutions at the end of each episode
        cells = worksheet.range('A' + str(ep+1) + ':D' + str(ep+1))
        cells[0].value = ep
        cells[1].value = vezes_resolvidas.sum()
        cells[2].value = end - start
        cells[3].value = qtd_total_presented
        worksheet.update_cells(cells)


    qtd_total_solucoes = np.array(qtd_total_solucoes)


    # Impressão dos resultados
    print('\nLevel: {}'.format(level))
    print("Mean of solutions for simulating {0} times: {1}".format(n_episodes, np.mean(qtd_total_solucoes)))
    print("Standard deviation of solutions for simulating {0} times: {1}".format(n_episodes, np.std(qtd_total_solucoes)))
    #print("Mean of tries for simulating {0} times: {1}".format(n_episodes, qtd_total_presented / n_episodes))

    # Encerra a contagem de tempo e imprime o tempo total elapsado
    #end = time.time()
    #hours, rem = divmod(end - start, 3600)
    #minutes, seconds = divmod(rem, 60)
    #print("Time elapsed: {:0>2}h:{:0>2}min:{:05.2f}s".format(int(hours), int(minutes), seconds))



#------------------------------#
Total number of solutions =  783.5
Status: Optimal
Episódio 1
#------------------------------#
Total number of solutions =  814.0
Status: Optimal
Episódio 2
#------------------------------#
Total number of solutions =  741.5
Status: Optimal
Episódio 3
#------------------------------#
Total number of solutions =  813.5
Status: Optimal
Episódio 4
#------------------------------#
Total number of solutions =  744.5
Status: Optimal
Episódio 5
#------------------------------#
Total number of solutions =  814.5
Status: Optimal
Episódio 6
#------------------------------#
Total number of solutions =  889.5
Status: Optimal
Episódio 7
#------------------------------#
Total number of solutions =  740.5
Status: Optimal
Episódio 8
#------------------------------#
Total number of solutions =  817.5
Status: Optimal
Episódio 9
#------------------------------#
Total number of solutions =  820.0
Status: Optimal
Episódio 10
#------------------------------#
Total number of 