In [2]:
import numpy as np
import os
from tqdm import tqdm
import csv
import ast
import antlr4
from antlr4 import *
from antlr4.tree.Trees import Trees
from antlr.JavaLexer import JavaLexer
from antlr.JavaParser import JavaParser

Create Classes for Attributes

In [3]:
class Student():
    def __init__(self, ID, score, assignments):
        self.ID = ID
        self.score = score
        self.assignments = assignments

class Assignment(): 
    def __init__(self, ID, problems):
        self.ID = ID
        self.problems = problems

class Problem(): 
    def __init__(self, ID, codeStates):
        self.ID = ID
        self.codeStates = codeStates

class CodeState():
    def __init__(self, ID, code, compileResult, compileMessageType, compileMessageData, score, time):
        self.ID = ID
        self.code = code
        self.compileResult = compileResult
        self.compileMessageType = compileMessageType
        self.compileMessageData = compileMessageData
        self.score = score
        self.time = time


Import Datasets

In [4]:
def load_data():

    codeStates = {}
    with open('./Datasets/CodeStates.csv', mode='r', encoding="utf8") as file:
        csv_reader = csv.reader(file)
        next(csv_reader)
        for line in csv_reader:
            codeStates[line[0]] = line[1] 
            
    mainTable = {}
    with open('./Datasets/MainTable.csv', mode='r', encoding="utf8") as file:
        csv_reader = csv.reader(file)
        next(csv_reader)
        for line in csv_reader:
            # Student
            if line[0] not in mainTable:
                mainTable[line[0]] = {}
            # Assignment
            if line[5] not in mainTable[line[0]]:
                mainTable[line[0]][line[5]] = {}
            # Problem
            if line[6] not in mainTable[line[0]][line[5]]:
                mainTable[line[0]][line[5]][line[6]] = {}
            # CodeState
            if line[7] not in mainTable[line[0]][line[5]][line[6]]:
                # mainTable[Student][Assignment][Problem][Codestate]
                mainTable[line[0]][line[5]][line[6]][line[7]] = {} 
            mainTable[line[0]][line[5]][line[6]][line[7]]['time'] = line[2]
            mainTable[line[0]][line[5]][line[6]][line[7]]['code'] = codeStates[line[7]]
            if line[10] != '':
                mainTable[line[0]][line[5]][line[6]][line[7]]['score'] = line[10] 
            if line[11] != '':
                mainTable[line[0]][line[5]][line[6]][line[7]]['compileResult'] = line[11]
            if line[12] != '':
                mainTable[line[0]][line[5]][line[6]][line[7]]['compileMessageType'] = line[12]
                mainTable[line[0]][line[5]][line[6]][line[7]]['compileMessageData'] = line[13]
    
    studentScore = {}
    with open('./Datasets/Subject.csv', mode='r', encoding="utf8") as file:
        csv_reader = csv.reader(file)
        next(csv_reader)
        for line in csv_reader:
            studentScore[line[0]] = line[1] 

    studentDataset = []
    for student in tqdm(mainTable.keys()):
        assignments = []
        for assignment in mainTable[student]:
            problems = []
            for problem in mainTable[student][assignment]:
                codeStates = []
                for codeState in mainTable[student][assignment][problem]:
                    compileMessageType = ''
                    compileMessageData = ''
                    if 'complieMessageType' in mainTable[student][assignment][problem][codeState]:
                        compileMessageType = mainTable[student][assignment][problem][codeState]['compileMessageType']
                        compileMessageData = mainTable[student][assignment][problem][codeState]['compileMessageData']
                    cS = CodeState(
                        codeState, 
                        mainTable[student][assignment][problem][codeState]['code'],
                        mainTable[student][assignment][problem][codeState]['compileResult'],
                        compileMessageType,
                        compileMessageData,
                        mainTable[student][assignment][problem][codeState]['score'],
                        mainTable[student][assignment][problem][codeState]['time']
                    )
                    codeStates.append(cS)
                pb = Problem(problem, codeStates)
                problems.append(pb)
            ass = Assignment(assignment, problems)
            assignments.append(ass)
        stu = Student(student, studentScore[student], assignments)
        studentDataset.append(stu)

    return np.array(studentDataset)

    



In [5]:
dataset = load_data()

100%|██████████| 506/506 [00:00<00:00, 1244.40it/s]


In [11]:
print(dataset[0].assignments[0].problems[0].codeStates[0].code)
print(dataset[0].assignments[0].problems[0].codeStates[0].score)

public int sortaSum(int a, int b)
{
    if (a + b >= 10 && a + b <= 19)
    {
     return 20;   
    }
    else
    {
     return a + b;   
    }
}

1.0


In each layer, [AST, score, other factors]. We need to concat AST + score + factors. The impact of score and factor maybe small because AST is higher dem.
1) use final state to predict the final 
2) use attension 
3) Pre train, predict next score between 0 and 1 

In [13]:
code = dataset[0].assignments[0].problems[0].codeStates[0].code
lexer = JavaLexer(antlr4.InputStream(code))
stream = antlr4.CommonTokenStream(lexer)
parser = JavaParser(stream)
tree = parser.compilationUnit()
elements = tree.toStringTree(recog=parser)
#with open("example.txt", mode="w") as file:
    #file.write(elements)
print(Trees.toStringTree(tree, None, parser))

<class 'str'>


line 1:7 extraneous input 'int' expecting {'abstract', 'class', 'enum', 'final', 'interface', 'private', 'protected', 'public', 'static', 'strictfp', 'module', 'open', 'requires', 'exports', 'opens', 'to', 'uses', 'provides', 'with', 'transitive', 'var', 'yield', 'record', 'sealed', 'permits', 'non-sealed', ';', '@', IDENTIFIER}
line 1:19 mismatched input '(' expecting '.'
line 1:25 no viable alternative at input 'int a,'
line 1:25 mismatched input ',' expecting '.'
line 1:32 mismatched input ')' expecting '.'
line 4:4 extraneous input '{' expecting {'abstract', 'class', 'enum', 'final', 'interface', 'private', 'protected', 'public', 'static', 'strictfp', 'module', 'open', 'requires', 'exports', 'opens', 'to', 'uses', 'provides', 'with', 'transitive', 'var', 'yield', 'record', 'sealed', 'permits', 'non-sealed', ';', '@', IDENTIFIER}
