<a href="https://colab.research.google.com/github/DaSilva-JV/AIED2025/blob/main/Known_outcome_Allocating_Dynamic_and_Finite_Resources_to_a_set_of_Tasks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from google.colab import drive

# Mount Google Drive to Collaboratory
drive.mount('/content/gdrive')

# Install pulp
!pip install pulp

# Getting the tasks k-parameter
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/calculated_k_mt.csv')
k = np.array(df['k_parameter'])

# Getting the number of tasks solved for each resource (for all the 10.000)
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/n_correct_answers_mt.csv')
n_solved = np.array(df['n_solved'])

# Getting the sampled resources
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Sampled_Volunteers.csv')
sampled_resources = np.array(df['Volunteer'])

# Getting the resources parameters
df = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/Estimated_Theta_mt.csv')
all_theta = np.array(df['Theta'])


Mounted at /content/gdrive
Collecting pulp
  Downloading PuLP-2.8.0-py3-none-any.whl (17.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m17.7/17.7 MB[0m [31m32.7 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pulp
Successfully installed pulp-2.8.0


In [None]:
from google.colab import files
import pandas as pd
import numpy as np
import time
import pulp
import math
from scipy.stats import norm
import csv
from datetime import datetime
from google.colab import auth
import gspread
from oauth2client.client import GoogleCredentials
from google.auth import default



def linear_programming_mod(n_tasks, n_resources, n_tries, n_solutions, vezes_resolvidas, ep, df_res):
    # Create the 'prob' variable to contain the problem data
    prob = pulp.LpProblem("TasksResources", pulp.LpMaximize)

    # Create list of all possible combinations of tasks and resources
    possible_item_resource = [(i, j) for i in range(n_tasks) for j in range(n_resources)]

    # Create a binary variable to state that a task was presented to a resource
    x = pulp.LpVariable.dicts("item_resource", possible_item_resource, lowBound=0, upBound=1, cat=pulp.LpContinuous)

    # The objective function is added to 'prob' first
    prob += pulp.lpSum([df_res.iloc[sampled_resources[(n_resources * ep)+t_v[1]], t_v[0]] * x[t_v] for t_v in possible_item_resource])

    # ----------- #
    # Constraints #
    # ----------- #
    # specify the number of tries for each resource
    for j in range(n_resources):
        prob += pulp.lpSum([x[(i, j)] for i in range(n_tasks)]) <= n_tries[j]

    # specify the desired quantity of solutions for each task
    for i in range(n_tasks):
        if (n_solutions[i] - vezes_resolvidas[i]) > 0:
            prob += pulp.lpSum([df_res.iloc[sampled_resources[(n_resources * ep)+j], i] * x[(i, j)] for j in range(n_resources)]) <= (n_solutions[i] - vezes_resolvidas[i])
        else:
            prob += pulp.lpSum([df_res.iloc[sampled_resources[(n_resources * ep)+j], i] * x[(i, j)] for j in range(n_resources)]) == 0

    # The problem data is written to an .lp file
    prob.writeLP("TasksResources.lp")

    # The problem is solved using PuLP's choice of Solver
    prob.solve()
    # prob.solve(pulp.PULP_CBC_CMD(gapRel=0.1))
    # status = prob.solve(solver=pulp.GLPK(msg=False))

    # Write the solutions in a pandas dataframe
    df = pd.DataFrame(columns=['item', 'resource', 'action'])
    # Each of the variables is printed with it's resolved optimum value
    for v in prob.variables():
        s = v.name.replace('item_resource_(', '').replace('_', '').replace(')', '')  # Leaves only the task and resource value separated by comma in a string
        s_list = s.split(',')  # Transforma a string em uma lista

        # Transforms string format to int in each list component
        s_list[0] = int(s_list[0])
        s_list[1] = int(s_list[1])

        # Adds the PL solution (corresponding to whether or not the task should be presented to the resource)
        s_list.append(v.varValue)

        # Add the solution to a dataframe
        if s_list[2] > 0:
            #df = df.append(dict(zip(df.columns, s_list)), ignore_index=True)
            df = pd.concat([df, pd.DataFrame.from_records([dict(zip(df.columns, s_list))])], ignore_index=True)

    print('#------------------------------#')
    print("Total number of solutions = ", pulp.value(prob.objective))  # The optimised objective function value is printed to the screen
    print("Status:", pulp.LpStatus[prob.status])  # The status of the solution is printed to the screen
    print("Episódio {}".format(ep+1))  # The ongoing/finished episode
    #print('#------------------------------#')

    return df
# ------------------------------------------------------------------------------


# ------------------------------------------------------------------------------
def get_enem_data():
    # Load the 180 answers from the 10000 students
    df_respostas = pd.read_csv('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/file.txt', sep=' ', header=None)

    df_respostas = df_respostas.iloc[:, 0:175]

    # Load the parameters a, b and c from the 180 tasks
    df_param_mt = pd.read_fwf('/content/gdrive/MyDrive/Allocating Dynamic and Finite Resources to a set of known Tasks/Matemática/param_enem_mt.txt')

    # Add the type of the item
    df_param_mt['Type'] = '1'

    # Put all parameters together
    df_tasks = df_param_mt

    # Delete the column Unnamed
    del df_tasks['Unnamed: 0']

    return df_tasks, df_respostas
# ------------------------------------------------------------------------------


# Policy for choosing the tasks to resources
def pl_policy(vol_, df_sol, flag_task):
    # Take the tasks found from the IntegerProgramming
    tasks = df_sol[df_sol['resource'] == vol_]
    w = list(tasks['action'])

    valid_tasks = {'item': [], 'action_w': []}
    # Checks if the task has not yet been presented to the resource and if it has not yet been resolved the desired number of times
    for j, q in enumerate(tasks['item']):
        q = int(q)
        if (q not in tarefas_apresentadas[vol_]) and (vezes_resolvidas[q] < n_solutions[q]):
            valid_tasks['item'].append(q)
            valid_tasks['action_w'].append(w[j])

    # Checks if there are tasks available for the resource
    if valid_tasks['item']:
        # if it exists, selects a task at random and adds the task to the list of those presented for the resource
        probs = np.array(valid_tasks['action_w']) / sum(valid_tasks['action_w'])
        tarefa = np.random.choice(valid_tasks['item'], p=probs)
        return tarefa
    else:
        if flag_task:
          # The first available task is returned even if it was not selected by LP
          for q in range(n_tasks):
            if (q not in tarefas_apresentadas[vol_]) and (vezes_resolvidas[q] < n_solutions[q]):
                return q
        return -1



if __name__ == '__main__':
    level = 4  # Levels to define n(t) and m(v)
    n_episodes = 100   # Number of episodes
    task_not_in_lp_sol = False    # If True returns the first available task, even if it was not selected by linear programming (LP)

    sheet_name = 'non-stochastic_qtd_solucoes_kk_' + '_' + str(level) + '_mod_' + str(task_not_in_lp_sol)

    # Tries and solutions
    n_resources = 100
    n_tasks = 45
    if level == 0 or level == 1 or level == 2:
        n_tries = np.array(10000 * [16])  # Levels 0, 1 and 2
    else:  # level == 3 or level == 4:
        n_tries = n_solved  # Levels 3 and 4

    # Getting the tasks parameters
    test_type = '1'  # Mathematics test
    df_param, df_respostas = get_enem_data()
    df_resp = df_respostas.iloc[:, 0:45]  # Filter only the mathematics tasks
    a = np.array(df_param[df_param['Type'] == test_type]['Dscrmn'])
    b = np.array(df_param[df_param['Type'] == test_type]['Dffclt'])
    c = np.array(df_param[df_param['Type'] == test_type]['Gussng'])

    # Simulate for many episodes
    qtd_total_solucoes = []

    for ep in range(0, n_episodes):
        # Initializes the time count
        start = time.time()

        # Authenticate to google spreadsheet

        auth.authenticate_user()
        #gc = gspread.authorize(GoogleCredentials.get_application_default())  # This token only lasts for one hour (you will only write one episode at a time)

        creds, _ = default()
        gc = gspread.authorize(creds)

        #gc.login()  # Refreshes the token
        # Creation of the spreadsheet
        if ep == 0:
            sh = gc.create(sheet_name)

        # Open our new sheet and add some data.
        worksheet = gc.open(sheet_name).sheet1

        # Load the parameters to solve the LP
        n_tries_100 = n_tries[sampled_resources[(n_resources * ep):(n_resources * (ep + 1))]]
        # theta_100 = np.random.normal(0, 1, n_resources)
        theta_100 = all_theta[sampled_resources[(n_resources * ep):(n_resources * (ep + 1))]]

        # Calculation of parameter k for each group of 100 sampled resources
        perc_resp_correta = list()

        for i in range(45):
            perc_resp_correta.append(df_resp.iloc[sampled_resources[(n_resources * ep):(n_resources * (ep + 1))], i].mean())

        if level == 0:
            n_solutions = 45 * [12]  # Level 0
        elif level == 1:
            n_solutions = 45 * [35]  # Level 1
        elif level == 2 or level == 3:
            n_solutions = np.array(perc_resp_correta) * n_resources  # Levels 2 and 3
        else:  # level == 4:
            n_solutions = np.array(perc_resp_correta) * n_resources * 0.5  # Level 4

        # Number of times each task was solved
        vezes_resolvidas = np.zeros(n_tasks)
        qtd_total_presented = 0

        # Tasks presented for each volunteer
        tarefas_apresentadas = dict()
        tentativas = dict()
        respostas_apresentadas = dict()

        # Matrix with the probabilities and the answers for each item
        num = 1000
        probs = np.zeros((n_tasks, num))


        # You also have to send the answer to LP:
        # Solving the LP
        df_s = linear_programming_mod(n_tasks, n_resources, n_tries_100, n_solutions, vezes_resolvidas, ep, df_resp)



        # For each resource, a task is assigned and the probability of the resource solving it is calculated
        for v_ in range(n_resources):

            # In the LPPS process, this adjustment is made, so I will do it here too so that the comparison is valid.
            v = (n_resources - 1) - v_

            # Each resource receives up to m different tasks
            tentativas[v] = 0
            tarefas_apresentadas[v] = list()
            respostas_apresentadas[v] = list()

            # Get the vector with the enem answers for resource v in the episode ep
            resp = np.array(df_resp.iloc[sampled_resources[v + n_resources * ep]])


            while tentativas[v] < n_tries[sampled_resources[v + n_resources * ep]]:
                # Obtained the task to be presented to the resource
                t = pl_policy(v, df_s, task_not_in_lp_sol)


                if t == -1:
                    break
                else:
                    # Verification by enem data if the student solved the task
                    # resp = np.array(df_resp.iloc[sampled_resources[v + n_resources * ep]])
                    resolvida = resp[t]

                    # Updates the number of times that task has been solved and increases attempts
                    vezes_resolvidas[t] += resolvida
                    tentativas[v] += 1
                    tarefas_apresentadas[v].append(t)
                    respostas_apresentadas[v].append(resp[t])

            qtd_total_presented += tentativas[v]

        qtd_total_solucoes.append(vezes_resolvidas.sum())

        # Calculate the time at the and of the episode
        end = time.time()

        # Save the number of solutions at the end of each episode
        cells = worksheet.range('A' + str(ep+1) + ':D' + str(ep+1))
        cells[0].value = ep
        cells[1].value = vezes_resolvidas.sum()
        cells[2].value = end - start
        cells[3].value = qtd_total_presented
        worksheet.update_cells(cells)


    qtd_total_solucoes = np.array(qtd_total_solucoes)


    # Impressão dos resultados
    print('\nLevel: {}'.format(level))
    print("Mean of solutions for simulating {0} times: {1}".format(n_episodes, np.mean(qtd_total_solucoes)))
    print("Standard deviation of solutions for simulating {0} times: {1}".format(n_episodes, np.std(qtd_total_solucoes)))
    #print("Mean of tries for simulating {0} times: {1}".format(n_episodes, qtd_total_presented / n_episodes))

    # Ends time counting and prints the total elapsed time
    #end = time.time()
    #hours, rem = divmod(end - start, 3600)
    #minutes, seconds = divmod(rem, 60)
    #print("Time elapsed: {:0>2}h:{:0>2}min:{:05.2f}s".format(int(hours), int(minutes), seconds))



#------------------------------#
Total number of solutions =  783.5
Status: Optimal
Episódio 1
#------------------------------#
Total number of solutions =  814.0
Status: Optimal
Episódio 2
#------------------------------#
Total number of solutions =  741.5
Status: Optimal
Episódio 3
#------------------------------#
Total number of solutions =  813.5
Status: Optimal
Episódio 4
#------------------------------#
Total number of solutions =  744.5
Status: Optimal
Episódio 5
#------------------------------#
Total number of solutions =  814.5
Status: Optimal
Episódio 6
#------------------------------#
Total number of solutions =  889.5
Status: Optimal
Episódio 7
#------------------------------#
Total number of solutions =  740.5
Status: Optimal
Episódio 8
#------------------------------#
Total number of solutions =  817.5
Status: Optimal
Episódio 9
#------------------------------#
Total number of solutions =  820.0
Status: Optimal
Episódio 10
#------------------------------#
Total number of 