In [11]:
import pickle
from datetime import datetime
import json
from pathlib import Path

In [12]:
folder = 'data/comp1_2020_1_v2'
Path(folder).mkdir(parents=True, exist_ok=True)

In [3]:
data_inicio = datetime.strptime('30/11/2020 00:00:00', '%d/%m/%Y %H:%M:%S')
data_fim = datetime.strptime('06/03/2021 23:59:59', '%d/%m/%Y %H:%M:%S')

In [4]:
turmas_validas = OnlineClass.objects.filter(start_date__gte=data_inicio, 
                                            start_date__lte=data_fim).values_list('pk', flat=True)
turmas_validas

<QuerySet [36, 38, 37, 54, 33, 34, 27, 28, 29, 25, 39, 26, 32, 31, 30]>

In [5]:
# Separando chapters PLE
valid_chapters = Chapter.objects.filter(label__contains='REMOTO')# | Chapter.objects.filter(label__contains='10')
valid_chapters.count()

9

In [6]:
# Remove professores e usuarios que já tenham utilizado o sistema em outra turma
professores = Professor.objects.all().values_list('user')
usuarios_tentativa_anterior = UserLog.objects.filter(user__userprofile__user_class__in=turmas_validas,
                                                     timestamp__lt=data_inicio).values_list('user').distinct()



In [7]:
# O que fazer com alunos repetentes?
# Vou separá-los por enquanto

### Get logs from classes with corrected assigned chapters

In [8]:
%%time
userlog = UserLog.objects.filter(user__userprofile__user_class__in=turmas_validas,
                                 problem__chapter__in=valid_chapters
                                ).exclude(user__in=usuarios_tentativa_anterior
                                ).exclude(user__in=professores).order_by('timestamp'
                                ).values('user__id', 'problem__id', 'outcome', 
                                         'timestamp', 'user__userprofile__user_class',
                                         'problem__chapter')
print(userlog.count())

106185
CPU times: user 0 ns, sys: 6.16 ms, total: 6.16 ms
Wall time: 947 ms


In [9]:
students = list(set([log['user__id'] for log in userlog]))
problems = list(set([log['problem__id'] for log in userlog]))
classes = list(set([log['user__userprofile__user_class'] for log in userlog]))
chapters = list(set([log['problem__chapter'] for log in userlog]))

print(f"Total de alunos: {len(students)}")
print(f"Total de problemas: {len(problems)}")
print(f"Total de turmas: {len(classes)}")
print(f"Total de listas: {len(chapters)}")

Total de alunos: 421
Total de problemas: 50
Total de turmas: 15
Total de listas: 9


In [13]:
with open("%s/userlogs.pkl" % folder, "wb") as pklfile:
    pickle.dump(list(userlog), pklfile)

In [14]:
%%time
userlog_complete = UserLog.objects.filter(user__userprofile__user_class__in=turmas_validas, 
                                          problem__chapter__in=valid_chapters
                                ).exclude(user__in=usuarios_tentativa_anterior
                                ).exclude(user__in=professores).order_by('timestamp'
                                ).values('user__id', 'problem__id', 'outcome', 
                                         'timestamp', 'user__userprofile__user_class',
                                         'solution', 'seconds_in_page', 'seconds_in_code', 
                                         'seconds_to_begin', 'problem__chapter')
print(userlog_complete.count())

with open("%s/userlogs_complete.pkl" % folder, "wb") as pklfile:
    pickle.dump(list(userlog_complete), pklfile)

106185
CPU times: user 1.53 s, sys: 427 ms, total: 1.96 s
Wall time: 31.9 s


In [61]:
# One row per test case
solutions = Solution.objects.filter(problem__in=problems, ignore=False).values(
    "problem_id", "content", "header", "problem__testcase__content").order_by("content")

In [62]:
%%time
data = []
old_content = ""
for item in solutions:
    # If it is the same problem, keep adding test cases
    if item["content"] == old_content:
        data[-1]["test_case"].append(json.loads(item["problem__testcase__content"]))
    # Otherwise, start new row
    else:
        item_data = {
            "problem_id": item["problem_id"],
            "solution": item["content"],
            "function_call": item["header"],
            "test_case": [json.loads(item["problem__testcase__content"])]
        }
        data.append(item_data)
    old_content = item["content"]

CPU times: user 3.96 ms, sys: 1.33 ms, total: 5.29 ms
Wall time: 167 ms


In [11]:
%%time
# Transform it into dict to lookup for problem
data_dict = {}
for item in data:
    data_dict[item["problem_id"]] = item

CPU times: user 28 µs, sys: 19 µs, total: 47 µs
Wall time: 49.6 µs


In [None]:
# Calculate test case percentage for each userlog
userlog_testcase = []
for item in userlog:
    tcs = data[userlog.problem_id]["test_case"]
    correct = 0
    # Calculate student response and solution for each test case for each log
    for tc in tcs:
        # Transform solution into Python function
        function_obj = compile(data[userlog.problem_id]["solution"], 'solution', 'exec')
        exec(function_obj)
        answer_solution = eval(item["function_call"])(*tc)
        
        # Transform student solution into Python function
        
        function_obj = compile(userlog.solution, 'solution', 'exec')
        exec(function_obj)
        answer_student = eval(item["function_call"])(*tc)

In [None]:
for item in data:
    for tc in item["test_case"]:
        print(tc)
        # Transform solution into python function
        function_obj = compile(item["solution"], 'solution', 'exec')
        exec(function_obj)
        answer = eval(item["function_call"])(*tc)
        