# Automatic Lab Evaluator

## Assessment based on student-provided results

* Jerónimo Arenas García
* Jesús Cid Sueiro

Version History:

    Version 0.1 (Dec. 2016)
        - Firts python 2 version and python 3 adaptation
    Version 0.2 (Dec. 2017) 
        - All configurable parameters in the first and second code cell.
        - Managing multiple mat files in students' zip files.
        - Corrected bug in readdatafiles (new student variables were not properly added to the dataframe)
        - Managing multiple class lists in Spanish and English.
        - External evaluation functions
        - New format of students report.
        - Uses ExamProject class.
        - Integrated student groups

In [1]:
import numpy as np
import pandas as pd
import os
from os.path import isfile, join
import scipy.io as sio
import scipy
import zipfile as zp
import shutil
import difflib
import csv
import glob

# Evaluation libraries
from lib.dbEvaluatorB12 import *
from lib.dbSolverB12 import *

## 1. Configurable parameters:

In [2]:
# #########
# Libraries
# #########

# Use the inherited examProject class corresponding to the exam to be evaluated.
import lib.examProjectB3 as ex

# #################
# Files and folders
# #################

# Project path
# project_path = '../LabEvaluationProjects/ProjectB3_1718_GSCGT/'
# project_path = '../LabEvaluationProjects/ProjectB3_1718/'
project_path = 'prb12'

# Exam_name. An exam evaluation project main contain several exams. Write here which one of them you want to evaluate.
# exam_label = 'ExLabB12_0'
exam_label = 'ExLabB12_2'

# Expected name of the students' results file. 
# This is used to disambiguate situations where the student uploaded multiple mat files
# (e.g. the input data file provided with the exam statement, or .mat files in .DS_STORE folders)
results_fname = 'results.mat'

# Output file name with
finalnotes_fname = 'student_notes.xlsx'

# ####
# Exam
# ####

# Penalties:
p_nocode = 0.75
p_noresults = 0.75
p_delay = 0.25      # score reduction per minute.

exam = ex.ExamProjectB3(project_path)
exam.load()

# Paths to input and output files
class_list_path = exam.f_struct['class_list']
all_students_path = exam.f_struct['all_students']
data4st_path = exam.f_struct['data4students']
results_path = exam.f_struct['student_results'] + exam_label + '/'
output_path = exam.f_struct['eval_results'] + exam_label + '/'
csv_questions_list = exam.f_struct['exam_statement'] + exam_label + '/' + exam_label + '.csv'

In [3]:
# List of exam questions from the database
print(csv_questions_list)
with open(csv_questions_list, 'r') as f:
    reader = csv.reader(f)
    questions = list(reader)[0]

# If the fils os not available, you can write the list os questions by hand 
# questions = ['F0_estimate_06', 'F1_model_01', 'F2_predict_03', 'F4_lms_02']
print("Questions in the exam: {0}".format(questions))

prb12/exam_statement/ExLabB12_2/ExLabB12_2.csv
Questions in the exam: ['R0_preproc_02', 'R1_calculow_01', 'R1_calculow_06', 'R2_estimacion_01', 'C0_analisis_01b', 'C0_analisis_02', 'C1_clasx1_01']


## 2. Read datafiles for all students

Student datafiles can be in any of the following formats:

   * `'.zip'`: When uncompressed, the zip may contain one or several matlab files. All matlab files are read and incorporated to a pandas Dataframe where each student is a column, and each index is a variable available for the exam solution
   * `'.mat'`: All data variables for the students are given in a single matlab file

In [4]:
def getFileName(fpath):
    return fpath.split('/')[-1]

def readData4st(datafiles_path):
    '''
    This function is used for reading the matlab data files provided to students 
    '''

    # Read matlab files in the input directory tree
    datafiles = glob.glob(datafiles_path + '**/*.mat', recursive=True)

    df = pd.DataFrame()
    
    # Read files
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in sorted(datafiles):
        
        # The tag can be the NIA, the student's name or just the begining of some other file
        tag = getFileName(dtfile).split('.')[0]

        # Load matlab data file
        data = sio.loadmat(dtfile, squeeze_me=True)
            
        # Read all variable names and the corresponding data values
        idx = []
        val = []
        for var in [el for el in data.keys() if not el.startswith('_')]:
            idx.append(var)
            val.append(data[var])
          
        # Add to dataframe
        df2 = pd.DataFrame()
        df2[tag] = pd.Series(val, index = idx)
        df = pd.concat([df, df2], axis=1)
        df.sort_index(axis=1, inplace=True)
    return df

In [5]:
# Read students' data.
print(data4st_path)
student_data = readData4st(data4st_path)

print('')
print('Number of students in dataframe:', str(student_data.shape[1]))
print('Number of variables read:', str(student_data.shape[0]))

print('Displaying data for first students ... ')
student_data[student_data.columns[:7]]

student_data['100318675']


prb12/data4students/
Processing 93 files in prb12/data4students/ ...

Number of students in dataframe: 93
Number of variables read: 6
Displaying data for first students ... 


Xtest     [[1.14320787739, 0.850436103975, 1.49963978283...
Xtrain    [[0.444210443191, 2.39799424856, 2.8880844494,...
sTrain    [0.591717558972, -0.901350988463, 0.9203210619...
xTest     [[1.34129788493, 3.56510520112, -1.7304584126,...
xTrain    [[-0.1082084353, 5.40573946316, -2.42965963338...
ytrain    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, ...
Name: 100318675, dtype: object

## 2. Read answers provided by students

In [6]:
def readdatafiles(datafiles_path, splitsymbol):
    '''
    This function is used for reading both the data files provided to students and the response
    files provided by students
    '''

    # Read file paths
    datafiles = glob.glob(datafiles_path + '**/*.*', recursive=True)
    # datafiles = [f for f in os.listdir(datafiles_path) if isfile(join(datafiles_path, f))]

    temporary_dir = './tmp'
    df = pd.DataFrame()
    
    # Read files
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in sorted(datafiles):
        idx = []
        val = []
        makedf = True      # This is a default flag. If it remains True, a new column will be added to the df

        # The tag can be the NIA, the student's name or just the begining of some other file
        tag = getFileName(dtfile).split(splitsymbol)[0]
        # tag = dtfile.split(splitsymbol)[0]

        if dtfile.endswith('.zip'):
            
            # Read names of .mat files
            zpobj = zp.ZipFile(dtfile)            
            # zpobj = zp.ZipFile(join(datafiles_path, dtfile))            
            mat_fnames = [f for f in zpobj.namelist() if f.endswith('mat')]
            
            # mat file selection. This is to disambiguate cases with multiple files
            n = len(mat_fnames)
            if n == 0:
                print ('    WARNING: {} has not delivered any mat file'.format(tag))
                fname = None
            else:
                if n > 1:
                    print('    WARNING: {} has provided multiple mat files:'.format(tag))
                    print('        {0}'.format(mat_fnames))                  

                # Define a nested set of criteria to select a single mat file form multiple options:
                criteria = [mat_fnames]
                criteria.append([f for f in criteria[0] if '.ipynb_checkpoints' not in f])
                criteria.append([f for f in criteria[1] if f[0].isalnum()])
                criteria.append([f for f in criteria[2] if getFileName(f)[0].isalnum()])
                criteria.append([f for f in criteria[3] if getFileName(f)[0].isalpha()])
                criteria.append([f for f in criteria[4] if f.endswith(results_fname)])

                # Selecte the file according to the most restrictive criterium with non empty members.
                for c in reversed(criteria):
                    if len(c) > 0:
                        # We take the first file in the list (an arbitrary choice)
                        fname = c[0]
                        break
                if n > 1:
                    print('        Selected file: {}'.format(fname))

            # Read the selected mat file, if any
            if fname is not None:
                # Matlab files are extracted to a temporal subfolder
                zpobj.extract(fname, temporary_dir)
                data = sio.loadmat(join(temporary_dir, fname), squeeze_me=True)
                
                # Read all variable names and the corresponding data values
                for var in [el for el in data.keys() if not el.startswith('_')]:
                    idx.append(var)
                    val.append(data[var])

            # Remove temporary directory, if it has been created
            if os.path.exists(temporary_dir):
                shutil.rmtree(temporary_dir)

        elif dtfile.endswith('.mat'):

            # This block of code was removed from the original notebook.
            # I have rescued it from another notebook
            # data = sio.loadmat(join(datafiles_path, dtfile), squeeze_me=True)
            data = sio.loadmat(dtfile, squeeze_me=True)
            
            # Read all variable names and the corresponding data values
            for var in [el for el in data.keys() if not el.startswith('_')]:
                idx.append(var)
                val.append(data[var])

        elif dtfile.endswith('m') or dtfile.endswith('py') or dtfile.endswith('.ipynb'):
            print('    WARNING: {} has provided a code file only:'.format(tag))
            print('        {0}'.format(dtfile))
        else:
            makedf = False
            if os.path.isfile(dtfile):
                print('    File ignored: {0}'.format(dtfile))
            
        if makedf:
            df2 = pd.DataFrame()
            df2[tag] = pd.Series(val, index = idx)
            df = pd.concat([df, df2], axis=1)
            df.sort_index(axis=1, inplace=True)
    return df
        

### 2.1. Requested variable names.

In order to get the names of the requested variables, we solve the exam with an arbitrary set of variables.

In [7]:
data = student_data[student_data.columns[0]].to_dict()
print(questions)
solution, scoring_ex = solveExam(questions, data)
truenames = list(solution.keys())

print(truenames)

['R0_preproc_02', 'R1_calculow_01', 'R1_calculow_06', 'R2_estimacion_01', 'C0_analisis_01b', 'C0_analisis_02', 'C1_clasx1_01']
['xMin', 'xMax', 'xmTrain', 'xmVal', 'we', 'w3', 'sEst', 'm0', 'P0', 'PFAx1']


### 2.2. Read student results into panda dataframe

In [8]:
# Read student results
student_results = readdatafiles(results_path, splitsymbol='_')

# Build a set of indices containing the expected variable names and all other variables provided by students
newindex = truenames + [el for el in student_results.index.tolist() if el not in truenames]

student_results = student_results.reindex(newindex)

print('')
print('Number of students in dataframe:', str(student_results.shape[1]))
print('Number of variables read:', str(student_results.shape[0]))

print('Displaying data for first students ... ')
student_results[student_results.columns[0:7]]

Processing 19 files in prb12/student_results/ExLabB12_2/ ...
        ['Lab12/results.mat', 'Lab12/Classificationdata.mat', '__MACOSX/Lab12/._Classificationdata.mat', 'Lab12/Regressiondata.mat', '__MACOSX/Lab12/._Regressiondata.mat']
        Selected file: Lab12/results.mat
        ['ESTEFANIA FUENTES FERNANDEZ_1162390_assignsubmission_file_Lab12/results.mat', '__MACOSX/ESTEFANIA FUENTES FERNANDEZ_1162390_assignsubmission_file_Lab12/._results.mat', 'ESTEFANIA FUENTES FERNANDEZ_1162390_assignsubmission_file_Lab12/Classificationdata.mat', '__MACOSX/ESTEFANIA FUENTES FERNANDEZ_1162390_assignsubmission_file_Lab12/._Classificationdata.mat', 'ESTEFANIA FUENTES FERNANDEZ_1162390_assignsubmission_file_Lab12/Regressiondata.mat', '__MACOSX/ESTEFANIA FUENTES FERNANDEZ_1162390_assignsubmission_file_Lab12/._Regressiondata.mat']
        Selected file: ESTEFANIA FUENTES FERNANDEZ_1162390_assignsubmission_file_Lab12/results.mat
        ['100318329/results.mat', '100318329/Classificationdata.mat', '__MA

Unnamed: 0,ADRIAN CRUZ SACEDO,ALEJANDRO LOPEZ CARRILLO,ALEJANDRO RODRIGUEZ ORTIZ,ANDONI TAJUELO MUÑOZ,BOGDAN GHEORGHE PRESCORNITOIU DRAGOS,CARLOS DIAZ FERNANDEZ,CARLOS MARTIN-ROMO BARRILERO
xMin,"[-7.52349944319, -5.31364115368, -3.8738923601...","[0.102752254317, -2.73377320371, -6.2007943157...",,,"[-2.67253462164, -5.39406407814, -5.6785920915...","[-9.45899741483, -2.24412361211, -6.0130768345...","[-5.27681419983, -2.18999404246, -4.9785684109..."
xMax,"[5.72159526699, 8.00527911473, 4.86096975305, ...","[9.3030382132, 9.90038970652, 6.3248621666, 6....",,,"[6.58981170788, 12.0939746503, 2.77769130374, ...","[-0.214798283946, 7.02657986514, 1.73980187161...","[6.05949648904, 6.97970182443, 3.81825837288, ..."
xmTrain,,,,,,,
xmVal,,,,,,,
we,,,,,,,
w3,,,,,,,
sEst,6.63108,1.10386,0.5,,,1.18932,3.4484
m0,"[0.240829656198, -0.0515301221038, 0.308403337...",,0.504132,0.0812296,,3.55481,1.23982
P0,0.39,,0.2,,,0.435,
PFAx1,0.855794,,"[[0.639257682682, nan], [nan, 0.639257682682]]",0.46763,,,


### 2.3. Common Mistakes on variable names

In view of all variable names provided by all students, we may decide to allow alternative names for variables without any penalty

In [9]:
# print(student_results)

In [10]:
print('Number of students in dataframe:', str(student_results.shape[1]))

print('\nDisplaying number of missing data per variable name.')
print('Those with a large number are potential common mistakes for a variable name')

student_results.isnull().sum(axis=1)

Number of students in dataframe: 19

Displaying number of missing data per variable name.
Those with a large number are potential common mistakes for a variable name


xMin        5
xMax        5
xmTrain    19
xmVal      19
we         19
w3         19
sEst        7
m0          6
P0         11
PFAx1       9
etaNP      13
etaNp      18
m1         16
mTrain     18
mo         18
sTest      17
tm         13
tv         14
vm         18
we3         2
we4         5
xeTrain    18
xnTest      5
xnTrain     2
dtype: int64

In [11]:
###########################################
# EXAM DEPENDENT VARIABLE

#Dictionary with accepted mistakes in the following format
#  Expected variable name : Accepted mistake
if exam_label == 'ExLabB12_0':
    Mistakes = {'xnVal': 'xnTest', 'wp': 'w', 'EAPval':'EAP'}
elif exam_label == 'ExLabB12_1':
    Mistakes = {'xnTest': 'xnTest.mat', 'xnVal': 'xnTest',
                'we': 'w2', 'w5': 'we', 
                'w': 'w1',
                'PFAx1': 'PFAX1',
                'uNP': 'etaNPX1', 'uNP': 'etanNPx1', 'etaNPx1': 'uNP'}
elif exam_label == 'ExLabB12_2':
    Mistakes = {'xnTest': 'xnTest.mat', 'xmVal': 'xnTest',
                'xnTrain': 'xnTrain.mat', 'xmTrain': 'xnTest',
                'we': 'we3', 
                'w3': 'we4',
                'm0': 'mo'}
    
##########################################

# Fill and empty variable by the value of its accepted mistake.
for el in Mistakes:    
    # The following 'if is necessary because some of the mistakes in the dictionary may not happen.
    if Mistakes[el] in student_results.index.tolist():
        # print(student_results.loc[Mistakes[el]])
        student_results.loc[el] = student_results.loc[el].fillna(student_results.loc[Mistakes[el]])

# Remove rows with the wrong variables.
for el in student_results.index.tolist():
    if el not in truenames:
        student_results.drop(el, inplace=True)
        
student_results.head(40)

Unnamed: 0,ADRIAN CRUZ SACEDO,ALEJANDRO LOPEZ CARRILLO,ALEJANDRO RODRIGUEZ ORTIZ,ANDONI TAJUELO MUÑOZ,BOGDAN GHEORGHE PRESCORNITOIU DRAGOS,CARLOS DIAZ FERNANDEZ,CARLOS MARTIN-ROMO BARRILERO,DIEGO GUTIERREZ LOPEZ,ESTEFANIA FUENTES FERNANDEZ,GALAN YUDIE HARDJONO HARTINI,JAVIER GONZALEZ FUENTES,JAVIER NOGUES GARCIA,JORGE TORRES MARTINEZ DE BUJO,MICHAEL UMENDU RIOS,NURIA PORTAL CARRASQUILLA,PEDRO JAVIER MARTIN-DOIMEADIOS POZO,ROBERTO HERNANDEZ GONZALEZ,ROCIO BARTOLOME FERNANDEZ,SONIA PEREZ BLAZQUEZ
xMin,"[-7.52349944319, -5.31364115368, -3.8738923601...","[0.102752254317, -2.73377320371, -6.2007943157...",,,"[-2.67253462164, -5.39406407814, -5.6785920915...","[-9.45899741483, -2.24412361211, -6.0130768345...","[-5.27681419983, -2.18999404246, -4.9785684109...",,,"[[-3.96241065339, -0.183803487258, -1.92804529...","[-3.30723246772, -2.96605663314, -6.5062067209...","[-3.15862093999, -5.39765709337, -3.0977407340...",,1,-2.71995,"[-8.4310194725, -8.93002093304, -1.05155295343...","[57, 94, 71, 98, 9]","[0.527330912372, -4.85672217398, -0.6739582483...","[-5.00999166906, -5.45125459541, -4.6556559828..."
xMax,"[5.72159526699, 8.00527911473, 4.86096975305, ...","[9.3030382132, 9.90038970652, 6.3248621666, 6....",,,"[6.58981170788, 12.0939746503, 2.77769130374, ...","[-0.214798283946, 7.02657986514, 1.73980187161...","[6.05949648904, 6.97970182443, 3.81825837288, ...",,,"[[-3.96241065339, -0.183803487258, -1.92804529...","[3.39496332764, 5.89710520227, 1.9998701713, 1...","[5.72779807488, 3.56547475077, 5.834826272, 5....",,5,"[1.59794252444, 0.381372018679, 1.0640598707, ...","[4.37835312453, 2.72989870544, 8.53031313453, ...","[47, 23, 84, 43, 47]","[2.72957460734, -2.67473791837, 2.11570675741,...","[3.29993704078, 5.8797946741, 5.86313095701, 8..."
xmTrain,"[[0.606826142938, 0.719524517448, 0.1119593180...","[[-0.476894231036, -0.480153413212, -0.7916405...","[[-0.781701387785, 0.966947995755, -0.26397575...",,"[[0.751535723015, 0.514720324616, 0.3203622667...","[[0.566832112764, -0.0744900623736, 0.66534623...","[[0.609710567179, 0.368112243021, 0.4877494649...",,"[[0.318105655424, 0.239926236214, 0.3862742491...",,"[[0.373361684862, 0.820592396866, 0.5650271563...","[[0.554355246626, 0.657332639288, 0.5401008591...","[[1.48256210689, -1.38289121491, 0.84358615583...",,"[[8.39065477508, 6.00949877776, 8.31424582137,...",,"[[5.36615609939, 1.3334669278, -4.86777584389,...","[[1.01783114337, 0.476285508484, 0.90291266759...","[[0.721987454443, 0.481834236596, 0.8512214770..."
xmVal,"[[0.606826142938, 0.719524517448, 0.1119593180...","[[-0.476894231036, -0.480153413212, -0.7916405...","[[-0.781701387785, 0.966947995755, -0.26397575...",,"[[0.751535723015, 0.514720324616, 0.3203622667...","[[0.566832112764, -0.0744900623736, 0.66534623...","[[0.609710567179, 0.368112243021, 0.4877494649...",,"[[0.318105655424, 0.239926236214, 0.3862742491...",,"[[0.373361684862, 0.820592396866, 0.5650271563...","[[0.554355246626, 0.657332639288, 0.5401008591...","[[1.48256210689, -1.38289121491, 0.84358615583...",,"[[8.39065477508, 6.00949877776, 8.31424582137,...",,"[[5.36615609939, 1.3334669278, -4.86777584389,...","[[1.01783114337, 0.476285508484, 0.90291266759...","[[0.721987454443, 0.481834236596, 0.8512214770..."
we,"[-6.94518532888, 6.00382712571, 0.506681797082...","[0.203554175446, 0.858909069228, 0.11645754566...","[0.127458071863, 0.855501259987, 0.13654176997...","[0.436967247599, 0.222193426388, 0.22142850742...","[-5.30856250793, 3.76018755338, 1.33721339809,...","[-5.3585867704, 3.08659282106, 1.46292402408, ...","[-6.21393847088, 4.72384868527, -0.18340927389...","[0.00629560707493, 0.419966141698, 0.658004295...",,"[-0.183803487258, -0.804458434548, 1.810622880...","[-4.79510814958, 2.18867694594, 2.2967850813, ...","[-5.33900904471, 3.56261991425, 1.36395591504,...","[-0.348861397014, 0.348091648997, 0.1130277736...","[1, 2, 3, 4, 5, 6]","[-0.481209168303, 0.165753782019, -0.058516796...","[-4.19151448546, 34.5890633193, -152.043980873...",,"[-0.037854277291, 0.489324072327, 0.4831535169...","[-6.4288072689, 4.60653072613, 0.998794988313,..."
w3,"[-2.57508894419, 6.71659291543, -2.14070017242...",,"[0.12745807186304284, 164.73717041446116, 1, 1...","[0.177718875346, 0.23505183761, -0.01771865053...",,"[-5.38430805715, 3.17843204676, 2.31944862603,...","[-4.17830809728, 4.34374474654, 1.20812013007,...","[1.02952194926, 0.484268252312, -0.90060661526...","[7.22689742052, 0.975570668084, -10.0914140079...",,"[1.86469286187, 2.32827464826, -5.63642468983,...","[0.448809122171, 1.58078262883, -3.36841356132...",,"[1, 2, 3, 4, 5, 6]","[[-0.37082404183, -1.60491656484, 0.2599322294...","[0.433375439274, 0.328999960902, -0.0899090846...",,"[-0.037854277291011464, 95.85968781645066, 1, ...","[0.743606798569, 0.521124579528, -0.0451147857..."
sEst,6.63108,1.10386,0.5,,,1.18932,3.4484,,"[-5.21521679319, -1.73221098699, 0.29263431464...","[6.67321520579, -0.241231875747, -0.2604510637...",0.582088,"[-0.546727860444, -0.591999665962, 0.826894578...",,,,0.528992,,"[[-0.037854277291, 0.489324072327, 0.483153516...",1.27117
m0,"[0.240829656198, -0.0515301221038, 0.308403337...",,0.504132,0.0812296,,3.55481,1.23982,0.445,0.917978,0.58,,1.43648,0.555,,"[1.80272345271, 1.40586548026, 1.33941435204, ...",,"[0.91185462356, 0.963438150335, 0.695393067823...",0.508478,-0.686423
P0,0.39,,0.2,,,0.435,,1.2531,,,,0.47,"[[0.848235510251, nan, nan, nan, nan, nan, nan...",,0.5,,,0,
PFAx1,0.855794,,"[[0.639257682682, nan], [nan, 0.639257682682]]",0.46763,,,,0.28386,0.179315,0.659142,,0.845124,"[[0.588037663366, nan, nan, nan, nan, nan, nan...",,0,,,0,


### 2.4. Name to NIA dictionary

Finally, since datafiles are created by NIA and results are available per student name, we need to create a dictionary connecting them.

Student names are taken from one or several student lists. Using multiple list is useful when the same exam is stated to multiple groups, or in the frequent situation where students from one group carry out the exam of another group.

In [12]:
# Select xls file names in the class list folder
print("Reading class lists...")
xls_files = [f for f in os.listdir(class_list_path) if f.endswith('.xls') or f.endswith('.xlsx')]
if len(xls_files) > 1:
    print("    There are {} excel files in the class_list folder.".format(len(xls_files)))
    print("    All students will be merged in a single list.")

# Load all xls files into dataframes
groups = []
for g in xls_files:
    df = pd.read_excel(class_list_path + g)
    # Translate column names form Spanish to English.
    # This is required to concatenate student lists in different languages.
    df.rename(columns={'Dirección de correo': 'Email address',
                       'Apellido(s)': 'Surname', 
                       'Nombre': 'First name'}, inplace=True)
    groups.append(df)

# Concatenate class lists (we do not expect duplicated NIU's in different lists)
student_NIA_names = pd.concat(groups)
print("Done. {0} students in the lists".format(len(student_NIA_names)))
student_NIA_names.sort_values('Surname')     #.head()

Reading class lists...
    There are 2 excel files in the class_list folder.
    All students will be merged in a single list.
Done. 93 students in the lists


Unnamed: 0,NIU,Surname,First name,Email address
63,100346720,AGREDA JIMENEZ,PEDRO,100346720@alumnos.uc3m.es
10,100293005,ALONSO CALVO,ENRIQUE,100293005@alumnos.uc3m.es
56,100330672,ARAQUE MUNICIO,GERSHON,100330672@alumnos.uc3m.es
74,100346888,ASTILLEROS APARICIO,CARLOS,100346888@alumnos.uc3m.es
70,100346814,BADIA NUÑEZ,DAVID,100346814@alumnos.uc3m.es
1,100277378,BARBERO HERRANZ,ANGEL,100277378@alumnos.uc3m.es
0,100315121,BARBOSA MARTIN,FELIPE,100315121@alumnos.uc3m.es
33,100317556,BARTOLOME FERNANDEZ,ROCIO,100317556@alumnos.uc3m.es
7,100291362,BELLIDO CASTILLO,DANIEL,100291362@alumnos.uc3m.es
52,100330499,CARMONA LOPEZ,RAQUEL,100330499@alumnos.uc3m.es


In [13]:
# UTF-8 encoding of everything
# AFAIK, this is no longer needed in Python 3, but I left it just in case...
for fld in student_NIA_names.keys():
    if fld != 'NIU':
        student_NIA_names[fld] = student_NIA_names[fld].str.encode('utf8')

# Build dictionary NIA: name
NIA_name = {}
for el in student_results.columns.tolist():

    # Find the student name in student_NIA_names that is most similar to el
    sim_list = []
    for idx, NIA in enumerate(student_NIA_names['NIU'].values):
        std_name = str(student_NIA_names['First name'].values.tolist()[idx]) + ' ' + \
                   str(student_NIA_names['Surname'].values.tolist()[idx])
        sim_list.append(difflib.SequenceMatcher(a=el.lower(), b=std_name.lower()).ratio())

    max_sim = max(sim_list)
    max_idx = sim_list.index(max_sim)
    NIA_name[student_NIA_names['NIU'].values.tolist()[max_idx]] = el

# Build reverse dictionary name: NIA
name_NIA = {NIA_name[el]: el for el in NIA_name}

### 2.5. Group of each student

We will include the information about the group in the final dataframe of results so as to make the separation of evaluation reports easier.

In [14]:
NIA_group = pd.read_csv(all_students_path)[['NIA', 'group']]
NIA_group.sort_values(['NIA']).head()

Unnamed: 0,NIA,group
0,100073324,[201718] M2.217.15939-61
1,100277378,[201718] M2.217.15939-61
2,100277493,[201718] M2.217.15939-61
3,100282920,[201718] M2.217.15939-61
4,100283839,[201718] M2.217.15939-61


At this point we have:

   * student_data: dataframe with data given to the students. Each index is a variable, and each column a NIA
   * student_results: dataframe with student results. Each index is a variable, and each column a name
   * NIA_name: NIA to name dictionary
   * name_NIA: name to NIA dictionary
   * NIA_group: dataframe

## 3. Exam evaluation

To carry out the evaluation of the exam, we use the external evaluation libraries.

Function evaluateExam computes the correct solutions for the given data and compares them with the responses provided by the students.

In [15]:
df = pd.DataFrame()

print('Evaluating all students... ')
for NIA in NIA_name:

    name = NIA_name[NIA]
    print('Evaluating {0} {1} ...'.format(NIA, name))

    # Evaluate the exam from the data provided to the student and the student response
    dataex = student_data[str(NIA)].to_dict()
    response = student_results[name].to_dict()
    exam_report = evaluateExam(questions, dataex, response)

    # Convert exam_report, which is a nested dictionary, into a pandas dataframe
    # Note that all this conversion to and from dictionaries can be avoided if evaluateExam 
    # worked with dataframes. This is a pending task.
    ex = {}
    # Note that we take the last 2 characters of the group name only.
    ex[('', 'Group')] = NIA_group[NIA_group['NIA'] == NIA]['group'].tolist()[0][-2:]   
    for v  in exam_report:
        for w in exam_report[v]:
            ex[(v,w)] = exam_report[v][w]
    
    df[NIA_name[NIA]] = pd.Series(ex)

# Take the transpose to place students in rows, and restate the original variable ordering
# This is because pd.Series does not preserve the order.
cols = list(ex.keys())
df = df.T[cols]

# Pretty print results
df[df.columns[:]].head(100)

Evaluating all students... 
Evaluating 100306582 ADRIAN CRUZ SACEDO ...
Evaluating 100329964 ALEJANDRO LOPEZ CARRILLO ...
         Removing older response to w_est
Evaluating 100291909 ALEJANDRO RODRIGUEZ ORTIZ ...
Evaluating 100316764 ANDONI TAJUELO MUÑOZ ...
Evaluating 100283839 BOGDAN GHEORGHE PRESCORNITOIU DRAGOS ...
         Removing older response to w_est
Evaluating 100303510 CARLOS DIAZ FERNANDEZ ...
Evaluating 100305285 CARLOS MARTIN-ROMO BARRILERO ...
Evaluating 100317070 DIEGO GUTIERREZ LOPEZ ...
Evaluating 100305941 ESTEFANIA FUENTES FERNANDEZ ...
Evaluating 100073324 GALAN YUDIE HARDJONO HARTINI ...
Evaluating 100318329 JAVIER GONZALEZ FUENTES ...
Evaluating 100277493 JAVIER NOGUES GARCIA ...
Evaluating 100346684 JORGE TORRES MARTINEZ DE BUJO ...
         Removing older response to w_est
Evaluating 100315702 MICHAEL UMENDU RIOS ...
         Removing older response to w_est
Evaluating 100307094 NURIA PORTAL CARRASQUILLA ...
Evaluating 100307156 PEDRO JAVIER MARTIN-DOIMEADI

Unnamed: 0_level_0,Unnamed: 1_level_0,xMin,xMin,xMin,xMin,xMax,xMax,xMax,xMax,xmTrain,...,m0,P0,P0,P0,P0,PFAx1,PFAx1,PFAx1,PFAx1,Exam
Unnamed: 0_level_1,Group,Dim,w,s,w·s,Dim,w,s,w·s,Dim,...,w·s,Dim,w,s,w·s,Dim,w,s,w·s,Score
ADRIAN CRUZ SACEDO,61,OK,1,1,1,OK,1,1,1,Error,...,0,OK,1,1,1,OK,1.0,0,0.0,6.0
ALEJANDRO LOPEZ CARRILLO,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1,0,0,No data,1.0,0,0.0,2.0
ALEJANDRO RODRIGUEZ ORTIZ,61,No data,1,0,0,No data,1,0,0,Error,...,0,OK,1,0,0,No data,1.0,0,0.0,0.0
ANDONI TAJUELO MUÑOZ,61,No data,1,0,0,No data,1,0,0,No data,...,0,No data,1,0,0,OK,1.0,0,0.0,0.0
BOGDAN GHEORGHE PRESCORNITOIU DRAGOS,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1,0,0,No data,1.0,0,0.0,4.0
CARLOS DIAZ FERNANDEZ,61,OK,1,1,1,OK,1,1,1,Error,...,0,OK,1,1,1,No data,1.0,0,0.0,5.0
CARLOS MARTIN-ROMO BARRILERO,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1,0,0,No data,1.0,0,0.0,5.0
DIEGO GUTIERREZ LOPEZ,61,No data,1,0,0,No data,1,0,0,No data,...,0,OK,1,0,0,OK,1.0,0,0.0,0.0
ESTEFANIA FUENTES FERNANDEZ,61,No data,1,0,0,No data,1,0,0,Error,...,0,No data,1,0,0,OK,1.0,0,0.0,2.0
GALAN YUDIE HARDJONO HARTINI,61,Error,1,0,0,Error,1,0,0,No data,...,0,No data,1,0,0,OK,0.9,1,0.9,0.9


### 3.1. Penalties

In addition to the evaluation of the results file provided by the student, the final mark depends on other factors:

1. If the student uploaded the code files
2. Delays in delivering the files during the exam.
3. Errors in the delivering process (use of e-mail, incorrect file types, etc).

The following function is used to identify the code uploaded by the student.

In [16]:
def detectCode(datafiles_path, splitsymbol):
    '''
    This function is used to check if the student has uploaded a python or a matlab code file
    '''

    # Read file paths
    # datafiles = [f for f in os.listdir(datafiles_path) if isfile(join(datafiles_path, f))]
    datafiles = glob.glob(datafiles_path + '**/*.*', recursive=True)
    
    # Read files
    df = pd.DataFrame()
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in datafiles:
        
        # This is a flag. If it remains True, a new column will be added to the df
        makedf = True      

        # The tag can be the NIA, the student's name or just the begining of some other file
        # tag = dtfile.split(splitsymbol)[0]
        tag = getFileName(dtfile).split(splitsymbol)[0]

        if tag in name_NIA:
        
            if dtfile.endswith('.zip'):
             
                # Read names of .mat files
                # files_in_zip = zp.ZipFile(join(datafiles_path, dtfile)).namelist()            
                files_in_zip = zp.ZipFile(dtfile).namelist()            

                # mat file selection. This is to disambiguate cases with multiple files
                n_mat = len([f for f in files_in_zip if f.endswith('.m')])
                n_py = len([f for f in files_in_zip if f.endswith('.py') or f.endswith('.ipynb')])

                if n_py * n_mat > 0:
                    print('WARNING: {} has delivered both matlab and python code'.format(name))

                if n_py > 0:
                    code = 'Py'
                elif n_mat > 0:
                    code = 'Mat'
                else:
                    code = 'None'

            elif dtfile.endswith('.py') or  dtfile.endswith('.ipynb'):
                code = 'Py'            
            elif dtfile.endswith('.m'):  
                code = 'Mat'
            else:
                code = 'None'

            df2 = pd.DataFrame()
            df2[tag] = pd.Series(code, index = ['Code'])
            df = pd.concat([df, df2], axis=1)
        elif os.path.isfile(dtfile):
            print('    File ignored: {0}'.format(dtfile))
    return df.T

In [17]:
# Identify the code delivered by the students
code_data = detectCode(results_path, splitsymbol='_')
code_data[code_data.columns][:].head()

# Add the code data to the evaluation dataframe
df['Delivery', 'Code'] = code_data
df['Delivery', 'Delay'] = 0.0
df['Delivery', 'Factor'] = 1.0

# Penalties for students that did not delivered any code.
df.loc[df['Delivery', 'Code'] == 'None', ('Delivery', 'Factor')] = 0.5 

Processing 19 files in prb12/student_results/ExLabB12_2/ ...


In [18]:
# This cell contains project specific instructions.

# PENALTIES:
if project_path == '../LabEvaluationProjects/ProjectB3_1718/':

    # STUDENTS THAT DID NOT DELIVER ANY RESULTS.
    #     ALEJANDRO GOMEZ RODENAS: (no e-mail) Delivers code only.
    #         Results generated with penalty
    df.at['ALEJANDRO GOMEZ RODENAS', ('Delivery', 'Factor')] = p_noresults

    #     ANDONI TAJUELO MUÑOZ: (no e-mail) Does not deliver results file. However, code computes some variables.
    #         Results generated with penalty
    df.at['ANDONI TAJUELO MUÑOZ', ('Delivery', 'Factor')] = p_noresults

    #     HAMZA EL HAMDAOUI ABOUEL ABBES: (e-mail) His computer get blocked and could not generate results file 
    #         savemat command incorrect. Code generated without penalty.
    df.at['HAMZA EL HAMDAOUI ABOUEL ABBES', ('Delivery', 'Factor')] = 1.0

    #     ROCIO BARTOLOME FERNANDEZ: (no e-mail) entrega un fichero Lab12.7z, pero cambia el nombre por Lab12zip
    #         Results generated with penalty.
    df.at['ROCIO BARTOLOME FERNANDEZ', ('Delivery', 'Factor')] = p_noresults

    #     CRISTINA GARCIA GARCIA: (e-mail) Does not deliver results file. Code does not compute any of the variables 
    #     NEREA MERIDA QUERO: (no e-mail) Delivers multiple code versions.
    #     RAQUEL CARMONA LOPEZ (no e-mail) No results file. The code is completely wrong.
    #     MICHAEL UMENDU RIOS: compressed files with .7z. Changed without penalty.
    
elif project_path == '../LabEvaluationProjects/ProjectB3_1718_Gbil/':
    # NO INCIDENTS IN THIS GROUP
    pass


if project_path == 'prb12':
    
    # ADRIAN LOPEZ RUIZ: 
    #      (1) python does not recognize the delivered file as zip. However, I could decompress with
    #          the unarchiver. zip file re-generated without penalty
    #      (2) the mat file is actualy a .ipynb with the extension changed.
    # FRANCISCO JAVIER VICENTE LASO: the .zip file cannot be read in any way. I have changed the extension to
    #          .unk.
    # MIGUEL RODRIGUEZ TALAVERON: delivers a .7z file. File .zip generated without penalty
    # ESTEFANIA FUENTES FERNANDEZ delivers a .7z file. File .zip generated without penalty
    pass

# if exam_label == 'ExLabB12_0':
#    df.drop('mTrain', axis=1, inplace=True)

Now we are ready to compute the final score

In [19]:
df['Final', 'Score'] = (df['Exam', 'Score'] - p_delay * df['Delivery', 'Delay']) * df['Delivery', 'Factor']
df[df.columns]    # .head()

Unnamed: 0_level_0,Unnamed: 1_level_0,xMin,xMin,xMin,xMin,xMax,xMax,xMax,xMax,xmTrain,...,P0,PFAx1,PFAx1,PFAx1,PFAx1,Exam,Delivery,Delivery,Delivery,Final
Unnamed: 0_level_1,Group,Dim,w,s,w·s,Dim,w,s,w·s,Dim,...,w·s,Dim,w,s,w·s,Score,Code,Delay,Factor,Score
ADRIAN CRUZ SACEDO,61,OK,1,1,1,OK,1,1,1,Error,...,1,OK,1.0,0,0.0,6.0,Py,0.0,1.0,6.0
ALEJANDRO LOPEZ CARRILLO,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1.0,0,0.0,2.0,Py,0.0,1.0,2.0
ALEJANDRO RODRIGUEZ ORTIZ,61,No data,1,0,0,No data,1,0,0,Error,...,0,No data,1.0,0,0.0,0.0,Py,0.0,1.0,0.0
ANDONI TAJUELO MUÑOZ,61,No data,1,0,0,No data,1,0,0,No data,...,0,OK,1.0,0,0.0,0.0,Py,0.0,1.0,0.0
BOGDAN GHEORGHE PRESCORNITOIU DRAGOS,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1.0,0,0.0,4.0,Py,0.0,1.0,4.0
CARLOS DIAZ FERNANDEZ,61,OK,1,1,1,OK,1,1,1,Error,...,1,No data,1.0,0,0.0,5.0,Py,0.0,1.0,5.0
CARLOS MARTIN-ROMO BARRILERO,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1.0,0,0.0,5.0,Py,0.0,1.0,5.0
DIEGO GUTIERREZ LOPEZ,61,No data,1,0,0,No data,1,0,0,No data,...,0,OK,1.0,0,0.0,0.0,,0.0,0.5,0.0
ESTEFANIA FUENTES FERNANDEZ,61,No data,1,0,0,No data,1,0,0,Error,...,0,OK,1.0,0,0.0,2.0,Py,0.0,1.0,2.0
GALAN YUDIE HARDJONO HARTINI,61,Error,1,0,0,Error,1,0,0,No data,...,0,OK,0.9,1,0.9,0.9,Py,0.0,1.0,0.9


## 4. Save results

In [20]:
# Save to excel file.
if not os.path.exists(output_path):
    os.makedirs(output_path)
df.to_excel(output_path + finalnotes_fname, columns=df.columns)