# Automatic Lab Evaluator

## Assessment based on student-provided results

* Jerónimo Arenas García
* Jesús Cid Sueiro

Version History:

    Version 0.1 (Dec. 2016)
        - Firts python 2 version and python 3 adaptation
    Version 0.2 (Dec. 2017) 
        - All configurable parameters in the first and second code cell.
        - Managing multiple mat files in students' zip files.
        - Corrected bug in readdatafiles (new student variables were not properly added to the dataframe)
        - Managing multiple class lists in Spanish and English.
        - External evaluation functions
        - New format of students report.
        - Uses ExamProject class.
        - Integrated student groups

In [1]:
import numpy as np
import pandas as pd
import os
from os.path import isfile, join
import scipy.io as sio
import scipy
import zipfile as zp
import shutil
import difflib
import csv
import glob

# Evaluation libraries
from lib.dbEvaluatorB12 import *
from lib.dbSolverB12 import *

## 1. Configurable parameters:

In [2]:
# #########
# Libraries
# #########

# Use the inherited examProject class corresponding to the exam to be evaluated.
import lib.examProjectB3 as ex
import lib.examProjectB3 as ex

# #################
# Files and folders
# #################

# Project path
# project_path = '../LabEvaluationProjects/ProjectB3_1718_GSCGT/'
# project_path = '../LabEvaluationProjects/ProjectB3_1718/'
project_path = 'prb12'

# Exam_name. An exam evaluation project main contain several exams. Write here which one of them you want to evaluate.
# exam_label = 'ExLabB12_0'
exam_label = 'ExLabB12_1'

# Expected name of the students' results file. 
# This is used to disambiguate situations where the student uploaded multiple mat files
# (e.g. the input data file provided with the exam statement, or .mat files in .DS_STORE folders)
results_fname = 'results.mat'

# Output file name with
finalnotes_fname = 'student_notes.xlsx'

# ####
# Exam
# ####

# Penalties:
p_nocode = 0.75
p_noresults = 0.75
p_delay = 0.25      # score reduction per minute.

exam = ex.ExamProjectB3(project_path)
exam.load()

# Paths to input and output files
class_list_path = exam.f_struct['class_list']
all_students_path = exam.f_struct['all_students']
data4st_path = exam.f_struct['data4students']
results_path = exam.f_struct['student_results'] + exam_label + '/'
output_path = exam.f_struct['eval_results'] + exam_label + '/'
csv_questions_list = exam.f_struct['exam_statement'] + exam_label + '/' + exam_label + '.csv'

In [3]:
# List of exam questions from the database
print(csv_questions_list)
with open(csv_questions_list, 'r') as f:
    reader = csv.reader(f)
    questions = list(reader)[0]

# If the fils os not available, you can write the list os questions by hand 
# questions = ['F0_estimate_06', 'F1_model_01', 'F2_predict_03', 'F4_lms_02']
print("Questions in the exam: {0}".format(questions))

prb12/exam_statement/ExLabB12_1/ExLabB12_1.csv
Questions in the exam: ['R0_preproc_01m', 'R1_calculow_02', 'R1_calculow_05', 'R2_estimacion_01', 'C1_clasx1_01', 'C1_clasx1_03']


## 2. Read datafiles for all students

Student datafiles can be in any of the following formats:

   * `'.zip'`: When uncompressed, the zip may contain one or several matlab files. All matlab files are read and incorporated to a pandas Dataframe where each student is a column, and each index is a variable available for the exam solution
   * `'.mat'`: All data variables for the students are given in a single matlab file

In [4]:
def getFileName(fpath):
    return fpath.split('/')[-1]

def readData4st(datafiles_path):
    '''
    This function is used for reading the matlab data files provided to students 
    '''

    # Read matlab files in the input directory tree
    datafiles = glob.glob(datafiles_path + '**/*.mat', recursive=True)

    df = pd.DataFrame()
    
    # Read files
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in sorted(datafiles):
        
        # The tag can be the NIA, the student's name or just the begining of some other file
        tag = getFileName(dtfile).split('.')[0]

        # Load matlab data file
        data = sio.loadmat(dtfile, squeeze_me=True)
            
        # Read all variable names and the corresponding data values
        idx = []
        val = []
        for var in [el for el in data.keys() if not el.startswith('_')]:
            idx.append(var)
            val.append(data[var])
          
        # Add to dataframe
        df2 = pd.DataFrame()
        df2[tag] = pd.Series(val, index = idx)
        df = pd.concat([df, df2], axis=1)
        df.sort_index(axis=1, inplace=True)
    return df

In [5]:
# Read students' data.
print(data4st_path)
student_data = readData4st(data4st_path)

print('')
print('Number of students in dataframe:', str(student_data.shape[1]))
print('Number of variables read:', str(student_data.shape[0]))

print('Displaying data for first students ... ')
student_data[student_data.columns[:7]]

student_data['100318675']


prb12/data4students/
Processing 93 files in prb12/data4students/ ...

Number of students in dataframe: 93
Number of variables read: 6
Displaying data for first students ... 


Xtest     [[1.14320787739, 0.850436103975, 1.49963978283...
Xtrain    [[0.444210443191, 2.39799424856, 2.8880844494,...
sTrain    [0.591717558972, -0.901350988463, 0.9203210619...
xTest     [[1.34129788493, 3.56510520112, -1.7304584126,...
xTrain    [[-0.1082084353, 5.40573946316, -2.42965963338...
ytrain    [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, ...
Name: 100318675, dtype: object

## 2. Read answers provided by students

In [6]:
def readdatafiles(datafiles_path, splitsymbol):
    '''
    This function is used for reading both the data files provided to students and the response
    files provided by students
    '''

    # Read file paths
    datafiles = glob.glob(datafiles_path + '**/*.*', recursive=True)
    # datafiles = [f for f in os.listdir(datafiles_path) if isfile(join(datafiles_path, f))]

    temporary_dir = './tmp'
    df = pd.DataFrame()
    
    # Read files
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in sorted(datafiles):
        idx = []
        val = []
        makedf = True      # This is a default flag. If it remains True, a new column will be added to the df

        # The tag can be the NIA, the student's name or just the begining of some other file
        tag = getFileName(dtfile).split(splitsymbol)[0]
        # tag = dtfile.split(splitsymbol)[0]

        if dtfile.endswith('.zip'):
            
            # Read names of .mat files
            zpobj = zp.ZipFile(dtfile)            
            # zpobj = zp.ZipFile(join(datafiles_path, dtfile))            
            mat_fnames = [f for f in zpobj.namelist() if f.endswith('mat')]
            
            # mat file selection. This is to disambiguate cases with multiple files
            n = len(mat_fnames)
            if n == 0:
                print ('    WARNING: {} has not delivered any mat file'.format(tag))
                fname = None
            else:
                if n > 1:
                    print('    WARNING: {} has provided multiple mat files:'.format(tag))
                    print('        {0}'.format(mat_fnames))                  

                # Define a nested set of criteria to select a single mat file form multiple options:
                criteria = [mat_fnames]
                criteria.append([f for f in criteria[0] if '.ipynb_checkpoints' not in f])
                criteria.append([f for f in criteria[1] if f[0].isalnum()])
                criteria.append([f for f in criteria[2] if getFileName(f)[0].isalnum()])
                criteria.append([f for f in criteria[3] if getFileName(f)[0].isalpha()])
                criteria.append([f for f in criteria[4] if f.endswith(results_fname)])

                # Selecte the file according to the most restrictive criterium with non empty members.
                for c in reversed(criteria):
                    if len(c) > 0:
                        # We take the first file in the list (an arbitrary choice)
                        fname = c[0]
                        break
                if n > 1:
                    print('        Selected file: {}'.format(fname))

            # Read the selected mat file, if any
            if fname is not None:
                # Matlab files are extracted to a temporal subfolder
                zpobj.extract(fname, temporary_dir)
                data = sio.loadmat(join(temporary_dir, fname), squeeze_me=True)
                
                # Read all variable names and the corresponding data values
                for var in [el for el in data.keys() if not el.startswith('_')]:
                    idx.append(var)
                    val.append(data[var])

            # Remove temporary directory, if it has been created
            if os.path.exists(temporary_dir):
                shutil.rmtree(temporary_dir)

        elif dtfile.endswith('.mat'):

            # This block of code was removed from the original notebook.
            # I have rescued it from another notebook
            # data = sio.loadmat(join(datafiles_path, dtfile), squeeze_me=True)
            data = sio.loadmat(dtfile, squeeze_me=True)
            
            # Read all variable names and the corresponding data values
            for var in [el for el in data.keys() if not el.startswith('_')]:
                idx.append(var)
                val.append(data[var])

        elif dtfile.endswith('m') or dtfile.endswith('py') or dtfile.endswith('.ipynb'):
            print('    WARNING: {} has provided a code file only:'.format(tag))
            print('        {0}'.format(dtfile))
        else:
            makedf = False
            if os.path.isfile(dtfile):
                print('    File ignored: {0}'.format(dtfile))
            
        if makedf:
            df2 = pd.DataFrame()
            df2[tag] = pd.Series(val, index = idx)
            df = pd.concat([df, df2], axis=1)
            df.sort_index(axis=1, inplace=True)
    return df
        

### 2.1. Requested variable names.

In order to get the names of the requested variables, we solve the exam with an arbitrary set of variables.

In [7]:
data = student_data[student_data.columns[0]].to_dict()
print(questions)
solution, scoring_ex = solveExam(questions, data)
truenames = list(solution.keys())

print(truenames)

['R0_preproc_01m', 'R1_calculow_02', 'R1_calculow_05', 'R2_estimacion_01', 'C1_clasx1_01', 'C1_clasx1_03']
['mTrain', 'xnTrain', 'xnVal', 'w', 'w5', 'sEst', 'PFAx1', 'etaNPx1']


### 2.2. Read student results into panda dataframe

In [8]:
# Read student results
student_results = readdatafiles(results_path, splitsymbol='_')

# Build a set of indices containing the expected variable names and all other variables provided by students
newindex = truenames + [el for el in student_results.index.tolist() if el not in truenames]

student_results = student_results.reindex(newindex)

print('')
print('Number of students in dataframe:', str(student_results.shape[1]))
print('Number of variables read:', str(student_results.shape[0]))

print('Displaying data for first students ... ')
student_results[student_results.columns[0:7]]

Processing 43 files in prb12/student_results/ExLabB12_1/ ...
        ['ADRIAN LOPEZ RUIZ_1159891_assignsubmission_file_Lab12/Classificationdata.mat', '__MACOSX/ADRIAN LOPEZ RUIZ_1159891_assignsubmission_file_Lab12/._Classificationdata.mat', 'ADRIAN LOPEZ RUIZ_1159891_assignsubmission_file_Lab12/Regressiondata.mat', '__MACOSX/ADRIAN LOPEZ RUIZ_1159891_assignsubmission_file_Lab12/._Regressiondata.mat']
        Selected file: ADRIAN LOPEZ RUIZ_1159891_assignsubmission_file_Lab12/Classificationdata.mat
        ['Lab12/Classificationdata.mat', '__MACOSX/Lab12/._Classificationdata.mat', 'Lab12/Regressiondata.mat', '__MACOSX/Lab12/._Regressiondata.mat']
        Selected file: Lab12/Classificationdata.mat
        ['EXAMEN/100333500/results.mat', 'EXAMEN/100333500/Classificationdata.mat', '__MACOSX/EXAMEN/100333500/._Classificationdata.mat', 'EXAMEN/100333500/Regressiondata.mat', '__MACOSX/EXAMEN/100333500/._Regressiondata.mat']
        Selected file: EXAMEN/100333500/results.mat
        ['Regr

Unnamed: 0,ABRAHAM HUELAMO IZQUIERDO,ADRIAN GARCIA MOÑINO,ADRIAN LOPEZ RUIZ,ALBERTO GUILLERMO HERNANDEZ DOMINGUEZ,ALEJANDRO GOMEZ RODENAS,ALEJANDRO RODRIGUEZ GARCIA,ALVARO VARELA CACHARRO
mTrain,"[1.39330437016, 0.802102662416, -1.27094047415...","[-3.37880620606, 1.16978899476, -1.16381226894...",,"[-1.1768364061e-16, -3.88578058619e-18, -1.415...",,"[0.0186033205228, 4.99952065325, -3.0060649470...","[-0.367983254081, -2.91421607009, 0.5373338361..."
xnTrain,"[[-1.31950520255, 0.679953355066, -0.030948540...","[[-1.34278185003, 0.175185121108, 0.6099123003...",,"[[-0.646773102183, 0.74269421408, 1.3990535874...","[[-1.09528821379, -0.0387199352965, -0.4552817...","[[0.136356441226, 1.11160713856, 1.02089534199...","[[1.20087203852, 1.53197941543, -0.80120350733..."
xnVal,,,,,,,
w,"[[-0.0798631824607, 0.465364378584, 0.48620141...","[0.0, 0.191760189893, 0.307699698076, 0.097728...",,"[0.003837167678, 0.311896604028, 0.60885449684...","[0.957697821237, 0.274276525984, 0.48946719944...","[0.0330115395257, 0.770473902532, 0.1066390746...","[0.189610996318, 0.734859096068, 0.08620408746..."
w5,,,,,,,
sEst,,,,"[2.28843121842, 2.70582123999, 3.0753643722, 2...",,0.776309,"[-0.150886731665, -0.150886731665, -0.15088673..."
PFAx1,,0.999968,,0.23975,0.591982,0.708132,1
etaNPx1,,,,0.812388,3.428,,1.81239
D,,,,,,,
ECP,,,,,,,


### 2.3. Common Mistakes on variable names

In view of all variable names provided by all students, we may decide to allow alternative names for variables without any penalty

In [9]:
# print(student_results)

In [10]:
print('Number of students in dataframe:', str(student_results.shape[1]))

print('\nDisplaying number of missing data per variable name.')
print('Those with a large number are potential common mistakes for a variable name')

student_results.isnull().sum(axis=1)

Number of students in dataframe: 42

Displaying number of missing data per variable name.
Those with a large number are potential common mistakes for a variable name


mTrain         12
xnTrain         9
xnVal          42
w              11
w5             42
sEst           27
PFAx1          21
etaNPx1        24
D              41
ECP            41
ECP2           41
PFAX1          41
Pe             41
Pfa            41
Xtest          38
Xtrain         38
etaNPX1        41
etanNPx1       41
mTrain.mat     41
sTest          40
sTrain         40
sVal           41
tm             35
tv             35
uNP            41
w1             41
w2             41
we             15
xTest          39
xTrain         39
xVal           41
xnTest          9
xnTest.mat     41
xnTrain.mat    41
ytest          38
ytrain         38
dtype: int64

In [11]:
###########################################
# EXAM DEPENDENT VARIABLE

#Dictionary with accepted mistakes in the following format
#  Expected variable name : Accepted mistake
if exam_label == 'ExLabB12_0':
    Mistakes = {'xnVal': 'xnTest', 'wp': 'w', 'EAPval':'EAP'}
elif exam_label == 'ExLabB12_1':
    Mistakes = {'sEst': 'sTest',
                'xnTest': 'xnTest.mat', 'xnVal': 'xnTest',
                'we': 'w2', 'w5': 'we', 
                'w': 'w1',
                'PFAx1': 'PFAX1',
                'uNP': 'etaNPX1', 'uNP': 'etanNPx1', 'etaNPx1': 'uNP'}
elif exam_label == 'ExLabB12_2':
    Mistakes = {'xnTest': 'xnTest.mat', 'xmVal': 'xnTest',
                'xnTrain': 'xnTrain.mat', 'xmTrain': 'xnTest',
                'we': 'we3', 
                'w3': 'we4',
                'm0': 'mo'}
    
##########################################

# Fill and empty variable by the value of its accepted mistake.
for el in Mistakes:    
    print(el)
    # The following 'if is necessary because some of the mistakes in the dictionary may not happen.
    if Mistakes[el] in student_results.index.tolist():
        # print(student_results.loc[Mistakes[el]])
        student_results.loc[el] = student_results.loc[el].fillna(student_results.loc[Mistakes[el]])

# Remove rows with the wrong variables.
for el in student_results.index.tolist():
    if el not in truenames:
        student_results.drop(el, inplace=True)
        
student_results.head(40)

sEst
xnTest
xnVal
we
w5
w
PFAx1
uNP
etaNPx1


Unnamed: 0,ABRAHAM HUELAMO IZQUIERDO,ADRIAN GARCIA MOÑINO,ADRIAN LOPEZ RUIZ,ALBERTO GUILLERMO HERNANDEZ DOMINGUEZ,ALEJANDRO GOMEZ RODENAS,ALEJANDRO RODRIGUEZ GARCIA,ALVARO VARELA CACHARRO,ANA FERNANDEZ SANTOS,ANDRES ESCALANTE ARIZA,ANTONIO HONORATO DE LA PEÑA,...,MARTA MOURE GARRIDO,MIGUEL RODRIGUEZ TALAVERON,NEREA MERIDA QUERO,OSCAR RODRIGUEZ CORPS,PAULA ENCINAR SANZ,PEDRO AGREDA JIMENEZ,PEDRO JOSE ESCOLAR SANCHEZ,RAQUEL CARMONA LOPEZ,SEBASTIAN ROSALES MAGALLARES,SERGIO MARTINEZ YEPES
mTrain,"[1.39330437016, 0.802102662416, -1.27094047415...","[-3.37880620606, 1.16978899476, -1.16381226894...",,"[-1.1768364061e-16, -3.88578058619e-18, -1.415...",,"[0.0186033205228, 4.99952065325, -3.0060649470...","[-0.367983254081, -2.91421607009, 0.5373338361...","[0.074892910817, 1.03740736142, -1.13253892346...","[1.80016902565, -0.864931010958, -1.0537127427...","[0.150475109005, -1.77215670302, -4.2944888407...",...,"[-0.235250292498, 1.94532300319, 2.77149242107...",,,"[2.24962468718, -2.32659841762, -2.51123734786...","[0.406583299035, -3.65064577737, 0.88617154762...","[2.80426303197, -0.276576064329, 1.12544802424...","[0.074892910817, 1.03740736142, -1.13253892346...",,"[-0.891461594607, 1.74860481782, 0.58481900919...","[3.61831236442, -1.81920112865, -0.09413542820..."
xnTrain,"[[-1.31950520255, 0.679953355066, -0.030948540...","[[-1.34278185003, 0.175185121108, 0.6099123003...",,"[[-0.646773102183, 0.74269421408, 1.3990535874...","[[-1.09528821379, -0.0387199352965, -0.4552817...","[[0.136356441226, 1.11160713856, 1.02089534199...","[[1.20087203852, 1.53197941543, -0.80120350733...","[[-1.0917839265, -0.0620775870229, -0.08550047...","[[0.200332463774, 0.59702306055, -0.3704775629...","[[-0.345804878947, -0.679425736353, 1.78163885...",...,"[[0.565027716776, -0.799690629293, -0.32165080...","[[-0.887444374284, 0.116816930367, 0.582218049...",,"[[-0.507776779381, 1.38488190624, -1.366346079...","[[-0.695587865144, 0.101643491246, 1.229815731...","[[1.27205495404, -0.480987440013, 0.7499711019...","[[-1.0917839265, -0.0620775870229, -0.08550047...",,"[[0.153031980052, -2.98188181231, 1.0612454055...","[[0.412028899181, -1.55801959904, 1.9182973349..."
xnVal,"[[1.0, 0.359998945167, 1.25410438664, 0.863282...","[[0.115352540502, -0.0830241435138, -0.5657565...",,"[[-1.68346887126, -0.328004942263, -0.34303887...","[[-0.246852350374, 0.75896019385, 0.7140750163...","[[-0.389433527791, -2.98384258653, 1.552405083...","[[-0.92516205203, -1.34772515249, 0.7996314813...","[[0.0975434937348, -0.758988428617, 0.39221067...","[[-0.227519102972, -1.97462351195, 0.403704310...","[[0.189457673722, 0.745310867094, -1.707538237...",...,"[[-0.514710739307, 1.23339207471, 1.2170822572...","[[-1.81099173422, -0.468102546589, 0.371559607...",,"[[-0.940985833888, -0.188922681921, -0.8478725...","[[0.355640305098, 0.144392425713, -1.060136776...","[[-0.586016849638, 0.0612895642138, 1.76480112...","[[0.0975434937348, -0.758988428617, 0.39221067...",,"[[-0.481753158897, -0.334939432042, 0.13689241...","[[-0.000935592933056, -1.19474562567, -0.42541..."
w,"[[-0.0798631824607, 0.465364378584, 0.48620141...","[0.0, 0.191760189893, 0.307699698076, 0.097728...",,"[0.003837167678, 0.311896604028, 0.60885449684...","[0.957697821237, 0.274276525984, 0.48946719944...","[0.0330115395257, 0.770473902532, 0.1066390746...","[0.189610996318, 0.734859096068, 0.08620408746...","[0.122607056115, 0.993522051479, 0.07712295098...","[-0.0300687853637, 0.477063672444, 0.514797881...","[0.506474090689, 0.343724378978, 0.34285880356...",...,"[-0.0548350424569, 0.756527719819, 0.179666234...",-0.103356,,"[0.0637310691518, 0.5754441314, 0.570901227055...","[-0.0998629685598, 0.132157065231, 0.676116497...","[[0.296422588589, 0.791471512513, 0.2361527035...",,,"[-0.0587910510111, 0.0956007465111, 0.85738146...","[0.276055676996, 0.115793089035, 0.27760309890..."
w5,"[[-0.0798631824607, 0.465364378584, 0.38896112...",,,"[0.003837167678, 0.314568557103, 1.24402426613]","[1.12472543573, 0.256930781293]","[0.0330115395257, 0.243297264339, 0.5]","[-0.150886731665, 0.189610996318]","[-0.294562973084, 0.137893944956, 2.05284235291]","[-0.0300687853637, 0.633417618453]","[-0.0488921429804, 0.410156522734, 1.20798428334]",...,"[-0.0548350424569, 0.758266346363]","[-0.103356484286, 0.598177693923, 0.0661894229...",,"[0.0637310691518, 0.611830116927]","[-0.0998629685598, 0.131950087911]","[[0.296422588589, 0.791471512513, 0.2361527035...","[-0.294562973084, 0.137893944956, 2.05284235291]",,"[-0.0587910510111, 0.833797189788, 0.5]",
sEst,,,,"[2.28843121842, 2.70582123999, 3.0753643722, 2...",,0.776309,"[-0.150886731665, -0.150886731665, -0.15088673...",1.89617,1.10335,,...,"[0.303585989259, 0.79215883607, 0.576442455503...",,"[52.125, 47.5, 38.75, 51.625, 45.125, 40.5, 53...",1.17556,,"[[0.296422588589, 0.791471512513, 0.2361527035...",,,1.27501,
PFAx1,,0.999968,,0.23975,0.591982,0.708132,1,0.56629,0.688794,0.689144,...,0.921114,,,0.5,,,,,0.236301,0.487165
etaNPx1,,,,0.812388,3.428,,1.81239,2.04847,3.49031,2.70081,...,2.20144,,,,2.21897,,,,1.28155,3.24445


### 2.4. Name to NIA dictionary

Finally, since datafiles are created by NIA and results are available per student name, we need to create a dictionary connecting them.

Student names are taken from one or several student lists. Using multiple list is useful when the same exam is stated to multiple groups, or in the frequent situation where students from one group carry out the exam of another group.

In [12]:
# Select xls file names in the class list folder
print("Reading class lists...")
xls_files = [f for f in os.listdir(class_list_path) if f.endswith('.xls') or f.endswith('.xlsx')]
if len(xls_files) > 1:
    print("    There are {} excel files in the class_list folder.".format(len(xls_files)))
    print("    All students will be merged in a single list.")

# Load all xls files into dataframes
groups = []
for g in xls_files:
    df = pd.read_excel(class_list_path + g)
    # Translate column names form Spanish to English.
    # This is required to concatenate student lists in different languages.
    df.rename(columns={'Dirección de correo': 'Email address',
                       'Apellido(s)': 'Surname', 
                       'Nombre': 'First name'}, inplace=True)
    groups.append(df)

# Concatenate class lists (we do not expect duplicated NIU's in different lists)
student_NIA_names = pd.concat(groups)
print("Done. {0} students in the lists".format(len(student_NIA_names)))
student_NIA_names.sort_values('Surname')     #.head()

Reading class lists...
    There are 2 excel files in the class_list folder.
    All students will be merged in a single list.
Done. 93 students in the lists


Unnamed: 0,NIU,Surname,First name,Email address
63,100346720,AGREDA JIMENEZ,PEDRO,100346720@alumnos.uc3m.es
10,100293005,ALONSO CALVO,ENRIQUE,100293005@alumnos.uc3m.es
56,100330672,ARAQUE MUNICIO,GERSHON,100330672@alumnos.uc3m.es
74,100346888,ASTILLEROS APARICIO,CARLOS,100346888@alumnos.uc3m.es
70,100346814,BADIA NUÑEZ,DAVID,100346814@alumnos.uc3m.es
1,100277378,BARBERO HERRANZ,ANGEL,100277378@alumnos.uc3m.es
0,100315121,BARBOSA MARTIN,FELIPE,100315121@alumnos.uc3m.es
33,100317556,BARTOLOME FERNANDEZ,ROCIO,100317556@alumnos.uc3m.es
7,100291362,BELLIDO CASTILLO,DANIEL,100291362@alumnos.uc3m.es
52,100330499,CARMONA LOPEZ,RAQUEL,100330499@alumnos.uc3m.es


In [13]:
# UTF-8 encoding of everything
# AFAIK, this is no longer needed in Python 3, but I left it just in case...
for fld in student_NIA_names.keys():
    if fld != 'NIU':
        student_NIA_names[fld] = student_NIA_names[fld].str.encode('utf8')

# Build dictionary NIA: name
NIA_name = {}
for el in student_results.columns.tolist():

    # Find the student name in student_NIA_names that is most similar to el
    sim_list = []
    for idx, NIA in enumerate(student_NIA_names['NIU'].values):
        std_name = str(student_NIA_names['First name'].values.tolist()[idx]) + ' ' + \
                   str(student_NIA_names['Surname'].values.tolist()[idx])
        sim_list.append(difflib.SequenceMatcher(a=el.lower(), b=std_name.lower()).ratio())

    max_sim = max(sim_list)
    max_idx = sim_list.index(max_sim)
    NIA_name[student_NIA_names['NIU'].values.tolist()[max_idx]] = el

# Build reverse dictionary name: NIA
name_NIA = {NIA_name[el]: el for el in NIA_name}

### 2.5. Group of each student

We will include the information about the group in the final dataframe of results so as to make the separation of evaluation reports easier.

In [14]:
NIA_group = pd.read_csv(all_students_path)[['NIA', 'group']]
NIA_group.sort_values(['NIA']).head()

Unnamed: 0,NIA,group
0,100073324,[201718] M2.217.15939-61
1,100277378,[201718] M2.217.15939-61
2,100277493,[201718] M2.217.15939-61
3,100282920,[201718] M2.217.15939-61
4,100283839,[201718] M2.217.15939-61


At this point we have:

   * student_data: dataframe with data given to the students. Each index is a variable, and each column a NIA
   * student_results: dataframe with student results. Each index is a variable, and each column a name
   * NIA_name: NIA to name dictionary
   * name_NIA: name to NIA dictionary
   * NIA_group: dataframe

## 3. Exam evaluation

To carry out the evaluation of the exam, we use the external evaluation libraries.

Function evaluateExam computes the correct solutions for the given data and compares them with the responses provided by the students.

In [15]:
df = pd.DataFrame()

print('Evaluating all students... ')
for NIA in NIA_name:

    name = NIA_name[NIA]
    print('Evaluating {0} {1} ...'.format(NIA, name))

    # Evaluate the exam from the data provided to the student and the student response
    dataex = student_data[str(NIA)].to_dict()
    response = student_results[name].to_dict()
    exam_report = evaluateExam(questions, dataex, response)

    # Convert exam_report, which is a nested dictionary, into a pandas dataframe
    # Note that all this conversion to and from dictionaries can be avoided if evaluateExam 
    # worked with dataframes. This is a pending task.
    ex = {}
    # Note that we take the last 2 characters of the group name only.
    ex[('', 'Group')] = NIA_group[NIA_group['NIA'] == NIA]['group'].tolist()[0][-2:]   
    for v  in exam_report:
        for w in exam_report[v]:
            ex[(v,w)] = exam_report[v][w]
    
    df[NIA_name[NIA]] = pd.Series(ex)

# Take the transpose to place students in rows, and restate the original variable ordering
# This is because pd.Series does not preserve the order.
cols = list(ex.keys())
df = df.T[cols]

# Pretty print results
df[df.columns[:]].head(100)

Evaluating all students... 
Evaluating 100346821 ABRAHAM HUELAMO IZQUIERDO ...
         It is likely an array containing mixed type elements
         It is likely an array containing mixed type elements
         It is likely an array containing mixed type elements
         It is likely an array containing mixed type elements
Evaluating 100330405 ADRIAN GARCIA MOÑINO ...
Evaluating 100330518 ADRIAN LOPEZ RUIZ ...
Evaluating 100346809 ALBERTO GUILLERMO HERNANDEZ DOMINGUEZ ...
Evaluating 100330163 ALEJANDRO GOMEZ RODENAS ...
Evaluating 100346904 ALEJANDRO RODRIGUEZ GARCIA ...
Evaluating 100330166 ALVARO VARELA CACHARRO ...
Evaluating 100316810 ANA FERNANDEZ SANTOS ...
         Removing older response to w_est
Evaluating 100330515 ANDRES ESCALANTE ARIZA ...
Evaluating 100330408 ANTONIO HONORATO DE LA PEÑA ...
         Removing older response to w_est
Evaluating 100339679 BORJA MONDEDEU SANZ ...
Evaluating 100316592 BRUNO VERDUGO VILLANUEVA ...
Evaluating 100346888 CARLOS ASTILLEROS APARI

Unnamed: 0_level_0,Unnamed: 1_level_0,mTrain,mTrain,mTrain,mTrain,xnTrain,xnTrain,xnTrain,xnTrain,xnVal,...,sEst,PFAx1,PFAx1,PFAx1,PFAx1,etaNPx1,etaNPx1,etaNPx1,etaNPx1,Exam
Unnamed: 0_level_1,Group,Dim,w,s,w·s,Dim,w,s,w·s,Dim,...,w·s,Dim,w,s,w·s,Dim,w,s,w·s,Score
ABRAHAM HUELAMO IZQUIERDO,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1,0,0,No data,1,0,0,2.5
ADRIAN GARCIA MOÑINO,61,OK,1,1,1,OK,1,1,1,Error,...,0,OK,1,0,0,No data,1,0,0,2.5
ADRIAN LOPEZ RUIZ,61,No data,1,0,0,No data,1,0,0,No data,...,0,No data,1,0,0,No data,1,0,0,0.0
ALBERTO GUILLERMO HERNANDEZ DOMINGUEZ,61,OK,1,0,0,OK,1,1,1,OK,...,0,OK,1,0,0,OK,1,0,0,1.25
ALEJANDRO GOMEZ RODENAS,61,No data,1,0,0,OK,1,0,0,OK,...,0,OK,1,0,0,OK,1,0,0,1.25
ALEJANDRO RODRIGUEZ GARCIA,61,OK,1,1,1,OK,1,1,1,OK,...,0,OK,1,0,0,No data,1,0,0,2.5
ALVARO VARELA CACHARRO,61,OK,1,1,1,OK,1,0,0,OK,...,0,OK,1,0,0,OK,1,0,0,2.25
ANA FERNANDEZ SANTOS,61,OK,1,1,1,OK,1,1,1,OK,...,0,OK,1,0,0,OK,1,0,0,5.0
ANDRES ESCALANTE ARIZA,61,OK,1,1,1,OK,1,1,1,OK,...,0,OK,1,0,0,OK,1,0,0,5.0
ANTONIO HONORATO DE LA PEÑA,61,OK,1,1,1,OK,1,1,1,OK,...,0,OK,1,0,0,OK,1,0,0,5.0


### 3.1. Penalties

In addition to the evaluation of the results file provided by the student, the final mark depends on other factors:

1. If the student uploaded the code files
2. Delays in delivering the files during the exam.
3. Errors in the delivering process (use of e-mail, incorrect file types, etc).

The following function is used to identify the code uploaded by the student.

In [16]:
def detectCode(datafiles_path, splitsymbol):
    '''
    This function is used to check if the student has uploaded a python or a matlab code file
    '''

    # Read file paths
    # datafiles = [f for f in os.listdir(datafiles_path) if isfile(join(datafiles_path, f))]
    datafiles = glob.glob(datafiles_path + '**/*.*', recursive=True)
    
    # Read files
    df = pd.DataFrame()
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in datafiles:
        
        # This is a flag. If it remains True, a new column will be added to the df
        makedf = True      

        # The tag can be the NIA, the student's name or just the begining of some other file
        # tag = dtfile.split(splitsymbol)[0]
        tag = getFileName(dtfile).split(splitsymbol)[0]

        if tag in name_NIA:
        
            if dtfile.endswith('.zip'):
             
                # Read names of .mat files
                # files_in_zip = zp.ZipFile(join(datafiles_path, dtfile)).namelist()            
                files_in_zip = zp.ZipFile(dtfile).namelist()            

                # mat file selection. This is to disambiguate cases with multiple files
                n_mat = len([f for f in files_in_zip if f.endswith('.m')])
                n_py = len([f for f in files_in_zip if f.endswith('.py') or f.endswith('.ipynb')])

                if n_py * n_mat > 0:
                    print('WARNING: {} has delivered both matlab and python code'.format(name))

                if n_py > 0:
                    code = 'Py'
                elif n_mat > 0:
                    code = 'Mat'
                else:
                    code = 'None'

            elif dtfile.endswith('.py') or  dtfile.endswith('.ipynb'):
                code = 'Py'            
            elif dtfile.endswith('.m'):  
                code = 'Mat'
            else:
                code = 'None'

            df2 = pd.DataFrame()
            df2[tag] = pd.Series(code, index = ['Code'])
            df = pd.concat([df, df2], axis=1)
        elif os.path.isfile(dtfile):
            print('    File ignored: {0}'.format(dtfile))
    return df.T

In [17]:
# Identify the code delivered by the students
code_data = detectCode(results_path, splitsymbol='_')
code_data[code_data.columns][:].head()

# Add the code data to the evaluation dataframe
df['Delivery', 'Code'] = code_data
df['Delivery', 'Delay'] = 0.0
df['Delivery', 'Factor'] = 1.0

# Penalties for students that did not delivered any code.
df.loc[df['Delivery', 'Code'] == 'None', ('Delivery', 'Factor')] = 0.5 

Processing 43 files in prb12/student_results/ExLabB12_1/ ...
    File ignored: prb12/student_results/ExLabB12_1/FRANCISCO JAVIER VICENTE LASO_1159851_assignsubmission_file_Lab12.unk


In [18]:
# This cell contains project specific instructions.

# PENALTIES:
if project_path == '../LabEvaluationProjects/ProjectB3_1718/':

    # STUDENTS THAT DID NOT DELIVER ANY RESULTS.
    #     ALEJANDRO GOMEZ RODENAS: (no e-mail) Delivers code only.
    #         Results generated with penalty
    df.at['ALEJANDRO GOMEZ RODENAS', ('Delivery', 'Factor')] = p_noresults

    #     ANDONI TAJUELO MUÑOZ: (no e-mail) Does not deliver results file. However, code computes some variables.
    #         Results generated with penalty
    df.at['ANDONI TAJUELO MUÑOZ', ('Delivery', 'Factor')] = p_noresults

    #     HAMZA EL HAMDAOUI ABOUEL ABBES: (e-mail) His computer get blocked and could not generate results file 
    #         savemat command incorrect. Code generated without penalty.
    df.at['HAMZA EL HAMDAOUI ABOUEL ABBES', ('Delivery', 'Factor')] = 1.0

    #     ROCIO BARTOLOME FERNANDEZ: (no e-mail) entrega un fichero Lab12.7z, pero cambia el nombre por Lab12zip
    #         Results generated with penalty.
    df.at['ROCIO BARTOLOME FERNANDEZ', ('Delivery', 'Factor')] = p_noresults

    #     CRISTINA GARCIA GARCIA: (e-mail) Does not deliver results file. Code does not compute any of the variables 
    #     NEREA MERIDA QUERO: (no e-mail) Delivers multiple code versions.
    #     RAQUEL CARMONA LOPEZ (no e-mail) No results file. The code is completely wrong.
    #     MICHAEL UMENDU RIOS: compressed files with .7z. Changed without penalty.
    
elif project_path == '../LabEvaluationProjects/ProjectB3_1718_Gbil/':
    # NO INCIDENTS IN THIS GROUP
    pass


if project_path == 'prb12':
    
    # ADRIAN LOPEZ RUIZ: 
    #      (1) python does not recognize the delivered file as zip. However, I could decompress with
    #          the unarchiver. zip file re-generated without penalty
    #      (2) the mat file is actualy a .ipynb with the extension changed.
    # FRANCISCO JAVIER VICENTE LASO: the .zip file cannot be read in any way. I have changed the extension to
    #          .unk.
    # MIGUEL RODRIGUEZ TALAVERON: delivers a .7z file. File .zip generated without penalty
    # ESTEFANIA FUENTES FERNANDEZ delivers a .7z file. File .zip generated without penalty
    pass

# if exam_label == 'ExLabB12_0':
#    df.drop('mTrain', axis=1, inplace=True)

Now we are ready to compute the final score

In [19]:
df['Final', 'Score'] = (df['Exam', 'Score'] - p_delay * df['Delivery', 'Delay']) * df['Delivery', 'Factor']
df[df.columns]    # .head()

Unnamed: 0_level_0,Unnamed: 1_level_0,mTrain,mTrain,mTrain,mTrain,xnTrain,xnTrain,xnTrain,xnTrain,xnVal,...,PFAx1,etaNPx1,etaNPx1,etaNPx1,etaNPx1,Exam,Delivery,Delivery,Delivery,Final
Unnamed: 0_level_1,Group,Dim,w,s,w·s,Dim,w,s,w·s,Dim,...,w·s,Dim,w,s,w·s,Score,Code,Delay,Factor,Score
ABRAHAM HUELAMO IZQUIERDO,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1,0,0,2.5,Py,0.0,1.0,2.5
ADRIAN GARCIA MOÑINO,61,OK,1,1,1,OK,1,1,1,Error,...,0,No data,1,0,0,2.5,Py,0.0,1.0,2.5
ADRIAN LOPEZ RUIZ,61,No data,1,0,0,No data,1,0,0,No data,...,0,No data,1,0,0,0.0,Py,0.0,1.0,0.0
ALBERTO GUILLERMO HERNANDEZ DOMINGUEZ,61,OK,1,0,0,OK,1,1,1,OK,...,0,OK,1,0,0,1.25,Py,0.0,1.0,1.25
ALEJANDRO GOMEZ RODENAS,61,No data,1,0,0,OK,1,0,0,OK,...,0,OK,1,0,0,1.25,Py,0.0,1.0,1.25
ALEJANDRO RODRIGUEZ GARCIA,61,OK,1,1,1,OK,1,1,1,OK,...,0,No data,1,0,0,2.5,,0.0,0.5,1.25
ALVARO VARELA CACHARRO,61,OK,1,1,1,OK,1,0,0,OK,...,0,OK,1,0,0,2.25,Mat,0.0,1.0,2.25
ANA FERNANDEZ SANTOS,61,OK,1,1,1,OK,1,1,1,OK,...,0,OK,1,0,0,5.0,Py,0.0,1.0,5.0
ANDRES ESCALANTE ARIZA,61,OK,1,1,1,OK,1,1,1,OK,...,0,OK,1,0,0,5.0,,0.0,0.5,2.5
ANTONIO HONORATO DE LA PEÑA,61,OK,1,1,1,OK,1,1,1,OK,...,0,OK,1,0,0,5.0,,0.0,0.5,2.5


## 4. Save results

In [20]:
# Save to excel file.
if not os.path.exists(output_path):
    os.makedirs(output_path)
df.to_excel(output_path + finalnotes_fname, columns=df.columns)