In [3]:
import pickle
from datetime import datetime
import json
from pathlib import Path

In [4]:
folder = 'data/comp1_2020_2'
Path(folder).mkdir(parents=True, exist_ok=True)

In [5]:
data_inicio = datetime.strptime('22/03/2021 00:00:00', '%d/%m/%Y %H:%M:%S')
data_fim = datetime.strptime('12/06/2021 23:59:59', '%d/%m/%Y %H:%M:%S')

In [6]:
turmas_validas = OnlineClass.objects.filter(start_date__gte=data_inicio, 
                                            start_date__lte=data_fim).order_by('pk')
turmas_validas

<QuerySet [<OnlineClass: EM2 - Natanael>, <OnlineClass: EP1 - Natanael - COMP 2>, <OnlineClass: MAB114_EE1_11452_REM2020_2>, <OnlineClass: MAB121_CMT1_11474_REM2020_2>, <OnlineClass: Comp1_2020_2_EM_(Carla)>, <OnlineClass: EC1 - Anamaria (2020-2)>, <OnlineClass: Computação I - CMT2 - 11475 (Rafael / Bernardo)>, <OnlineClass: Computação I - CMT3 - 11477 (Rafael / Danilo)>, <OnlineClass: Comp1_2020-2_EP_EC2 [Danilo]>, <OnlineClass: Comp1_2020-2_SV_IQT1-IQQ [Danilo]>, <OnlineClass: CompI_2020.2_EP_ENU/ET1/ER1  [Jéssica]>, <OnlineClass: CompI_2020.2_NTA+MAA [Jéssica]>, <OnlineClass: Comp 1 IGA/IG1- 2020.2 - Daniel Alfaro>, <OnlineClass: Comp I - IFN/IQQ  (2020.2 - João Vitor)>]>

In [7]:
for item in turmas_validas:
    print(item.pk, item.name)

55 EM2 - Natanael
56 EP1 - Natanael - COMP 2
57 MAB114_EE1_11452_REM2020_2
58 MAB121_CMT1_11474_REM2020_2
59 Comp1_2020_2_EM_(Carla)
60 EC1 - Anamaria (2020-2)
62 Computação I - CMT2 - 11475 (Rafael / Bernardo)
63 Computação I - CMT3 - 11477 (Rafael / Danilo)
64 Comp1_2020-2_EP_EC2 [Danilo]
65 Comp1_2020-2_SV_IQT1-IQQ [Danilo]
67 CompI_2020.2_EP_ENU/ET1/ER1  [Jéssica]
68 CompI_2020.2_NTA+MAA [Jéssica]
69 Comp 1 IGA/IG1- 2020.2 - Daniel Alfaro
70 Comp I - IFN/IQQ  (2020.2 - João Vitor)


In [8]:
turmas_validas = [55, 57, 58, 59, 60, 62, 63, 64, 65, 67, 68, 69, 70]

In [9]:
def get_chapters(chapters, weeks):
    result = []
    for item in weeks:
        result.append(chapters[item])
    return result

In [10]:
from django.db.models import Case, When

chapter_order = [20, 12, 13, 14, 15, 16, 17, 19, 10]
preserved = Case(*[When(pk=pk, then=pos) for pos, pk in enumerate(chapter_order)])
chapters = Chapter.objects.filter(pk__in=chapter_order).order_by(preserved)

chapter_dict = {}
chapter_start = 2
for idx, item in enumerate(chapters):
    chapter_dict[idx+chapter_start] = item.pk
    print(f"{item.pk} - {item.label}")

20 - (REMOTO) 02 - Funcoes e Tipos de dados
12 - (REMOTO) 03 - Tipos de dados, Strings, Estrutura Condicional
13 - (REMOTO) 04 - Variáveis e atribuição, strings e tuplas
14 - (REMOTO) 05 - Manipulação de strings, tuplas e listas
15 - (REMOTO) 06 - Fatiamento e manipulação de listas
16 - (REMOTO) 07 - Estrutura de repetição com teste de parada: While
17 - (REMOTO) 08 - Estrutura de repetição iteradora: for
19 - (REMOTO) 09 - Laços aninhados e matrizes
10 - (REMOTO) 10 - Dicionário


In [11]:
# Remove professores e usuarios que já tenham utilizado o sistema em outra turma
professores = Professor.objects.all().values_list('user')
usuarios_tentativa_anterior = UserLog.objects.filter(user__userprofile__user_class__in=turmas_validas,
                                                     timestamp__lt=data_inicio).values_list('user').distinct()



In [12]:
# O que fazer com alunos repetentes?
# Vou separá-los por enquanto

### Get logs from classes with corrected assigned chapters

In [13]:
%%time
CHAPTERS = [2, 3, 4, 5, 6, 7, 8, 9, 10]
chapter_filter = get_chapters(chapter_dict, CHAPTERS)
userlog = UserLog.objects.filter(user__userprofile__user_class__in=turmas_validas,
                                 problem__chapter__in=chapter_filter
                                ).exclude(user__in=usuarios_tentativa_anterior
                                ).exclude(user__in=professores).order_by('timestamp'
                                ).values('user__id', 'problem__id', 'outcome', 
                                         'timestamp', 'user__userprofile__user_class',
                                         'problem__chapter')
print(userlog.count())

68837
CPU times: user 2.93 ms, sys: 2 ms, total: 4.93 ms
Wall time: 427 ms


In [14]:
students = list(set([log['user__id'] for log in userlog]))
problems = list(set([log['problem__id'] for log in userlog]))
classes = list(set([log['user__userprofile__user_class'] for log in userlog]))
chapters = list(set([log['problem__chapter'] for log in userlog]))

print(f"Total de alunos: {len(students)}")
print(f"Total de problemas: {len(problems)}")
print(f"Total de turmas: {len(classes)}")
print(f"Total de listas: {len(chapters)}")

Total de alunos: 294
Total de problemas: 45
Total de turmas: 12
Total de listas: 9


In [15]:
with open("%s/userlogs.pkl" % folder, "wb") as pklfile:
    pickle.dump(list(userlog), pklfile)

In [16]:
%%time
userlog_complete = UserLog.objects.filter(user__userprofile__user_class__in=turmas_validas, 
                                          problem__chapter__in=chapter_filter
                                ).exclude(user__in=usuarios_tentativa_anterior
                                ).exclude(user__in=professores).order_by('timestamp'
                                ).values('user__id', 'problem__id', 'outcome', 
                                         'timestamp', 'user__userprofile__user_class',
                                         'solution', 'seconds_in_page', 'seconds_in_code', 
                                         'seconds_to_begin', 'problem__chapter')
print(userlog_complete.count())

with open("%s/userlogs_complete.pkl" % folder, "wb") as pklfile:
    pickle.dump(list(userlog_complete), pklfile)

68837
CPU times: user 410 ms, sys: 207 ms, total: 617 ms
Wall time: 14.7 s


In [61]:
# One row per test case
solutions = Solution.objects.filter(problem__in=problems, ignore=False).values(
    "problem_id", "content", "header", "problem__testcase__content").order_by("content")

In [62]:
%%time
data = []
old_content = ""
for item in solutions:
    # If it is the same problem, keep adding test cases
    if item["content"] == old_content:
        data[-1]["test_case"].append(json.loads(item["problem__testcase__content"]))
    # Otherwise, start new row
    else:
        item_data = {
            "problem_id": item["problem_id"],
            "solution": item["content"],
            "function_call": item["header"],
            "test_case": [json.loads(item["problem__testcase__content"])]
        }
        data.append(item_data)
    old_content = item["content"]

CPU times: user 3.96 ms, sys: 1.33 ms, total: 5.29 ms
Wall time: 167 ms


In [11]:
%%time
# Transform it into dict to lookup for problem
data_dict = {}
for item in data:
    data_dict[item["problem_id"]] = item

CPU times: user 28 µs, sys: 19 µs, total: 47 µs
Wall time: 49.6 µs


In [None]:
# Calculate test case percentage for each userlog
userlog_testcase = []
for item in userlog:
    tcs = data[userlog.problem_id]["test_case"]
    correct = 0
    # Calculate student response and solution for each test case for each log
    for tc in tcs:
        # Transform solution into Python function
        function_obj = compile(data[userlog.problem_id]["solution"], 'solution', 'exec')
        exec(function_obj)
        answer_solution = eval(item["function_call"])(*tc)
        
        # Transform student solution into Python function
        
        function_obj = compile(userlog.solution, 'solution', 'exec')
        exec(function_obj)
        answer_student = eval(item["function_call"])(*tc)

In [None]:
for item in data:
    for tc in item["test_case"]:
        print(tc)
        # Transform solution into python function
        function_obj = compile(item["solution"], 'solution', 'exec')
        exec(function_obj)
        answer = eval(item["function_call"])(*tc)
        