# Automatic Lab Evaluator

## Assessment based on student-provided results

* Jerónimo Arenas García
* Jesús Cid Sueiro

Version History:

    Version 0.1 (Dec. 2016)
        - Firts python 2 version and python 3 adaptation
    Version 0.2 (Dec. 2017) 
        - All configurable parameters in the first and second code cell.
        - Managing multiple mat files in students' zip files.
        - Corrected bug in readdatafiles (new student variables were not properly added to the dataframe)
        - Managing multiple class lists in Spanish and English.
        - External evaluation functions
        - New format of students report.

Pending issues:
    - Taking the target variable names from the list of questions from the database.

In [1]:
import numpy as np
import pandas as pd
import os
from os.path import isfile, join
import scipy.io as sio
import scipy
import zipfile as zp
import shutil
import difflib

# Evaluation libraries
from lib.dbEvaluator import *
from lib.dbSolver import *

## 1. Configurable parameters:

In [2]:
# #################
# Files and folders
# #################

# Project path
# project_path = '../LabEvaluationProjects/ProjectB3_1718_GSCGT/'
project_path = '../LabEvaluationProjects/ProjectB3_1718_Gbil/'

# Paths to input and output files
class_list_path = project_path + 'listadeclase/'
data4st_path = project_path + 'data4students/'
results_path = project_path + 'student_results/Regulares/'
output_path = project_path + 'output/'
statements_path = project_path + 'exam_statement/'

# Expected name of the students' results file. 
# This is used to disambiguate situations where the student uploaded multiple mat files
# (e.g. the input data file provided with the exam statement, or .mat files in .DS_STORE folders)
results_fname = 'results.mat'

# Output file name with
finalnotes_fname = 'student_notes.xlsx'

# ####
# Exam
# ####

# List of exam questions from the database
questions = ['F0_estimate_06', 'F1_model_01', 'F2_predict_03', 'F4_lms_02']

# Penalties:
p_nocode = 0.75
p_noresults = 0.75
p_delay = 0.25      # score reduction per minute.

## 2. Read datafiles for all students

Student datafiles can be in any of the following formats:

   * `'.zip'`: When uncompressed, the zip may contain one or several matlab files. All matlab files are read and incorporated to a pandas Dataframe where each student is a column, and each index is a variable available for the exam solution
   * `'.mat'`: All data variables for the students are given in a single matlab file

In [3]:
def getFileName(fpath):
    return fpath.split('/')[-1]

def readdatafiles(datafiles_path, splitsymbol):
    '''
    This function is used for reading both the data files provided to student and the response
    files provided by students
    '''

    # Read file paths
    datafiles = [f for f in os.listdir(datafiles_path) if isfile(join(datafiles_path, f))]

    temporary_dir = './tmp'
    df = pd.DataFrame()
    
    # Read files
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in sorted(datafiles):
        
        idx = []
        val = []
        makedf = True      # This is a default flag. If it remains True, a new column will be added to the df

        # The tag can be the NIA, the student's name or just the begining of some other file
        tag = dtfile.split(splitsymbol)[0]

        if dtfile.endswith('.zip'):
            
            # Read names of .mat files
            zpobj = zp.ZipFile(join(datafiles_path, dtfile))            
            mat_fnames = [f for f in zpobj.namelist() if f.endswith('mat')]
            
            # mat file selection. This is to disambiguate cases with multiple files
            n = len(mat_fnames)
            if n == 0:
                print ('    WARNING: {} has not delivered any mat file'.format(tag))
                fname = None
            else:
                if n > 1:
                    print('    WARNING: {} has provided multiple mat files:'.format(tag))
                    print('        {0}'.format(mat_fnames))                  

                # Define a nested set of criteria to select a single mat file form multiple options:
                criteria = [mat_fnames]
                criteria.append([f for f in criteria[0] if '.ipynb_checkpoints' not in f])
                criteria.append([f for f in criteria[1] if f[0].isalnum()])
                criteria.append([f for f in criteria[2] if getFileName(f)[0].isalnum()])
                criteria.append([f for f in criteria[3] if getFileName(f)[0].isalpha()])
                criteria.append([f for f in criteria[4] if f.endswith(results_fname)])

                # Selecte the file according to the most restrictive criterium with non empty members.
                for c in reversed(criteria):
                    if len(c) > 0:
                        # We take the first file in the list (an arbitrary choice)
                        fname = c[0]
                        break
                if n > 1:
                    print('        Selected file: {}'.format(fname))

            # Read the selected mat file, if any
            if fname is not None:
                # Matlab files are extracted to a temporal subfolder
                zpobj.extract(fname, temporary_dir)
                data = sio.loadmat(join(temporary_dir, fname), squeeze_me=True)
                
                # Read all variable names and the corresponding data values
                for var in [el for el in data.keys() if not el.startswith('_')]:
                    idx.append(var)
                    val.append(data[var])

            # Remove temporary directory, if it has been created
            if os.path.exists(temporary_dir):
                shutil.rmtree(temporary_dir)

        elif dtfile.endswith('.mat'):

            # This block of code was removed from the original notebook.
            # I have rescued it from another notebook
            data = sio.loadmat(join(datafiles_path, dtfile), squeeze_me=True)
            
            # Read all variable names and the corresponding data values
            for var in [el for el in data.keys() if not el.startswith('_')]:
                idx.append(var)
                val.append(data[var])

        elif dtfile.endswith('m') or dtfile.endswith('py') or dtfile.endswith('.ipynb'):
            print('    WARNING: {} has provided a code file only:'.format(tag))
            print('        {0}'.format(dtfile))
        else:
            makedf = False
            print('    File ignored: {0}'.format(dtfile))
            
        if makedf:
            df2 = pd.DataFrame()
            df2[tag] = pd.Series(val, index = idx)
            df = pd.concat([df, df2], axis=1)
            df.sort_index(axis=1, inplace=True)
    return df
        

In [4]:
# Read students' data.
student_data = readdatafiles(data4st_path, splitsymbol='.')

print('')
print('Number of students in dataframe:', str(student_data.shape[1]))
print('Number of variables read:', str(student_data.shape[0]))

print('Displaying data for first students ... ')
student_data[student_data.columns[:7]]

Processing 12 files in ../LabEvaluationProjects/ProjectB3_1718_Gbil/data4students/ ...
    File ignored: Icon

Number of students in dataframe: 11
Number of variables read: 7
Displaying data for first students ... 


Unnamed: 0,100304972,100315121,100316478,100318675,100329922,100346250,100346579
x,"[-2.68199400057, -4.9908445778, -0.93925546076...","[1.8955052226, -2.29237646911, 0.665766165774,...","[2.38809491357, 2.05817212461, 0.463230280894,...","[-3.41053111582, -0.878311837836, -0.842311383...","[0.816761436107, -3.51553778656, -4.3555006301...","[0.676343378346, 0.426953696728, 1.94509770739...","[-2.68516071752, 1.87729431265, 2.0957127654, ..."
u,"[-1.43558184792, -1.75925717211, 0.53792997241...","[1.06050484041, -1.47657258458, 0.883755946642...","[1.32066617425, 0.656724845171, -0.18317950745...","[-1.86621071307, 0.397371524499, -0.4092839286...","[0.455631465515, -1.90483305409, -1.6494625488...","[0.493258058455, -0.0447684445662, 1.061256018...","[-1.19250276839, 1.33466064863, 0.732836027606..."
uTest,"[-1.04553355293, -1.24885933755, 0.30411497519...","[-1.86108368422, -0.473500042735, -0.208791736...","[-0.130346445423, -0.430302460457, 0.119118035...","[-0.011134688848, 1.29408260467, 0.95311790419...","[-0.899077787049, -0.244587709573, 0.127849282...","[0.70735888204, 0.0106941640539, 0.36629875738...","[-0.312423069674, -0.0884012094224, 2.04098579..."
varS,0.0178571,0.0454545,0.0185185,0.0263158,0.03125,0.0384615,0.0294118
varN,0.03944,0.02242,0.02956,0.0335,0.03844,0.025,0.03158
M,29,12,28,20,17,14,18
state,"(100304972, array([ -2.68199400e+00, -4.99084...","(100315121, array([ 1.89550522e+00, -2.29237...","(100316478, array([ 2.38809491e+00, 2.05817...","(100318675, array([ -3.41053112e+00, -8.78311...","(100329922, array([ 0.81676144, -3.51553779, -...","(100346250, array([ 6.76343378e-01, 4.26953...","(100346579, array([-2.68516072, 1.87729431, ..."


## 2. Read answers provided by students

### 2.1. Requested variable names.

In order to get the names of the requested variables, we solve the exam with an arbitrary set of variables.

In [5]:
data = student_data[student_data.columns[0]].to_dict()

solution, scoring_ex = solveExam(questions, data)
truenames = list(solution.keys())

### 2.2. Read student results into panda dataframe

In [6]:
# Read student results
student_results = readdatafiles(results_path, splitsymbol='_')

# Build a set of indices containing the expected variable names and all other variables provided by students
newindex = truenames + [el for el in student_results.index.tolist() if el not in truenames]

student_results = student_results.reindex(newindex)

print('')
print('Number of students in dataframe:', str(student_results.shape[1]))
print('Number of variables read:', str(student_results.shape[0]))

print('Displaying data for first students ... ')
student_results[student_results.columns[0:7]]

Processing 9 files in ../LabEvaluationProjects/ProjectB3_1718_Gbil/student_results/Regulares/ ...
    File ignored: .DS_Store
    File ignored: Icon
        ['exam_l3/100346898.mat', '__MACOSX/exam_l3/._100346898.mat', 'exam_l3/results.mat']
        Selected file: exam_l3/results.mat
        ['results.mat', '100346250.mat']
        Selected file: results.mat

Number of students in dataframe: 7
Number of variables read: 5
Displaying data for first students ... 


Unnamed: 0,DIEGO PENROZ VALENZUELA,EDUARDO HERRERA ARRUTI,FELIPE BARBOSA MARTIN,JESUS LOPEZ BAEZA-ROJANO,LUIS ANTHONY SANTIVAÑEZ CALDAS,MATTHEW MOORCROFT,RAUL LOZANO SANZ
sMAP,"[1.99563279797, 0.810021578291, 0.283267323781...",,,"[-0.162816479281, -0.158911350699, -0.05735958...","[1.99262695404, 0.784686500567, 0.281064051555...","[-0.0583935766041, -0.0368619760236, -0.167934...","[0.151636015779, -0.0159585673379, 0.221763107..."
vMSE,,"[[0.331689694888, 0.0, 0.0, -0.0, -0.0, -0.0, ...","[[0.788894624264, 0.0, 0.0, 0.0, -0.0, -0.0, 0...",0.000438344,"[[8.17269386911e-05, 7.51688733809e-07, -5.169...","[[0.0384134615863, -4.8076875259e-05, -4.80768...","[[1.50143155904, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0..."
xMSE10,,0.138439,0.0101368,-0.105019,"[-3.50739813318, 0.549619391319, 1.82952633678...",,0.0406088
s30,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[-0.0135199069296, -0.121397000569, 0.23535931...","[-0.00796986330883, -0.216927442831, -0.105459...",,"[1.83946948959, 0.793331734337, 0.162502634465...","[0.676343378346, -275.846869987, 56064.8563409...","[-0.0732292931638, 0.105172044414, 0.094513013..."
smap,,"[-0.301229001056, 0.167943890023, 0.1419200943...","[0.145861158926, -0.203883012914, 0.3762382665...",,,,


### 2.3. Common Mistakes on variable names

In view of all variable names provided by all students, we may decide to allow alternative names for variables without any penalty

In [7]:
# print(student_results)

In [8]:
print('Number of students in dataframe:', str(student_results.shape[1]))

print('\nDisplaying number of missing data per variable name.')
print('Those with a large number are potential common mistakes for a variable name')

student_results.isnull().sum(axis=1)

Number of students in dataframe: 7

Displaying number of missing data per variable name.
Those with a large number are potential common mistakes for a variable name


sMAP      2
vMSE      1
xMSE10    2
s30       1
smap      5
dtype: int64

In [9]:
###########################################
# EXAM DEPENDENT VARIABLE

#Dictionary with accepted mistakes in the following format
#  Expected variable name : Accepted mistake
Mistakes = {'sMAP': 'smap', 's30': 'S30', 'xMSE10':'XMSE10'}
##########################################

# Fill and empty variable by the value of its accepted mistake.
for el in Mistakes:    
    # The following 'if is necessary because some of the mistakes in the dictionary may not happen.
    if Mistakes[el] in student_results.index.tolist():
        print(student_results.loc[Mistakes[el]])
        student_results.loc[el] = student_results.loc[el].fillna(student_results.loc[Mistakes[el]])

# Remove rows with the wrong variables.
for el in student_results.index.tolist():
    if el not in truenames:
        student_results.drop(el, inplace=True)
        
student_results[student_results.columns[:7]]

DIEGO PENROZ VALENZUELA                                                         NaN
EDUARDO HERRERA ARRUTI            [-0.301229001056, 0.167943890023, 0.1419200943...
FELIPE BARBOSA MARTIN             [0.145861158926, -0.203883012914, 0.3762382665...
JESUS LOPEZ BAEZA-ROJANO                                                        NaN
LUIS ANTHONY SANTIVAÑEZ CALDAS                                                  NaN
MATTHEW MOORCROFT                                                               NaN
RAUL LOZANO SANZ                                                                NaN
Name: smap, dtype: object


Unnamed: 0,DIEGO PENROZ VALENZUELA,EDUARDO HERRERA ARRUTI,FELIPE BARBOSA MARTIN,JESUS LOPEZ BAEZA-ROJANO,LUIS ANTHONY SANTIVAÑEZ CALDAS,MATTHEW MOORCROFT,RAUL LOZANO SANZ
sMAP,"[1.99563279797, 0.810021578291, 0.283267323781...","[-0.301229001056, 0.167943890023, 0.1419200943...","[0.145861158926, -0.203883012914, 0.3762382665...","[-0.162816479281, -0.158911350699, -0.05735958...","[1.99262695404, 0.784686500567, 0.281064051555...","[-0.0583935766041, -0.0368619760236, -0.167934...","[0.151636015779, -0.0159585673379, 0.221763107..."
vMSE,,"[[0.331689694888, 0.0, 0.0, -0.0, -0.0, -0.0, ...","[[0.788894624264, 0.0, 0.0, 0.0, -0.0, -0.0, 0...",0.000438344,"[[8.17269386911e-05, 7.51688733809e-07, -5.169...","[[0.0384134615863, -4.8076875259e-05, -4.80768...","[[1.50143155904, 0.0, -0.0, 0.0, 0.0, 0.0, 0.0..."
xMSE10,,0.138439,0.0101368,-0.105019,"[-3.50739813318, 0.549619391319, 1.82952633678...",,0.0406088
s30,"[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,...","[-0.0135199069296, -0.121397000569, 0.23535931...","[-0.00796986330883, -0.216927442831, -0.105459...",,"[1.83946948959, 0.793331734337, 0.162502634465...","[0.676343378346, -275.846869987, 56064.8563409...","[-0.0732292931638, 0.105172044414, 0.094513013..."


### 2.4. Name to NIA dictionary

Finally, since datafiles are created by NIA and results are available per student name, we need to create a dictionary connecting them.

Student names are taken from one or several student lists. Using multiple list is useful when the same exam is stated to multiple groups, or in the frequent situation where students from one group carry out the exam of another group.

In [10]:
# Select xls file names in the class list folder
print("Reading class lists...")
xls_files = [f for f in os.listdir(class_list_path) if f.endswith('.xls') or f.endswith('.xlsx')]
if len(xls_files) > 1:
    print("    There are {} excel files in the class_list folder.".format(len(xls_files)))
    print("    All students will be merged in a single list.")

# Load all xls files into dataframes
groups = []
for g in xls_files:
    df = pd.read_excel(class_list_path + g)
    # Translate column names form Spanish to English.
    # This is required to concatenate student lists in different languages.
    df.rename(columns={'Dirección de correo': 'Email address',
                       'Apellido(s)': 'Surname', 
                       'Nombre': 'First name'}, inplace=True)
    groups.append(df)

# Concatenate class lists (we do not expect duplicated NIU's in different lists)
student_NIA_names = pd.concat(groups)
print("Done. {0} students in the lists".format(len(student_NIA_names)))
student_NIA_names.sort_values('Surname')     #.head()

Reading class lists...
    There are 2 excel files in the class_list folder.
    All students will be merged in a single list.
Done. 93 students in the lists


Unnamed: 0,NIU,Surname,First name,Email address
63,100346720,AGREDA JIMENEZ,PEDRO,100346720@alumnos.uc3m.es
10,100293005,ALONSO CALVO,ENRIQUE,100293005@alumnos.uc3m.es
56,100330672,ARAQUE MUNICIO,GERSHON,100330672@alumnos.uc3m.es
74,100346888,ASTILLEROS APARICIO,CARLOS,100346888@alumnos.uc3m.es
70,100346814,BADIA NUÑEZ,DAVID,100346814@alumnos.uc3m.es
1,100277378,BARBERO HERRANZ,ANGEL,100277378@alumnos.uc3m.es
0,100315121,BARBOSA MARTIN,FELIPE,100315121@alumnos.uc3m.es
33,100317556,BARTOLOME FERNANDEZ,ROCIO,100317556@alumnos.uc3m.es
7,100291362,BELLIDO CASTILLO,DANIEL,100291362@alumnos.uc3m.es
52,100330499,CARMONA LOPEZ,RAQUEL,100330499@alumnos.uc3m.es


In [11]:
# UTF-8 encoding of everything
# AFAIK, this is no longer needed in Python 3, but I left it just in case...
for fld in student_NIA_names.keys():
    if fld != 'NIU':
        student_NIA_names[fld] = student_NIA_names[fld].str.encode('utf8')

# Build dictionary NIA: name
NIA_name = {}
for el in student_results.columns.tolist():

    # Find the student name in student_NIA_names that is most similar to el
    sim_list = []
    for idx, NIA in enumerate(student_NIA_names['NIU'].values):
        std_name = str(student_NIA_names['First name'].values.tolist()[idx]) + ' ' + \
                   str(student_NIA_names['Surname'].values.tolist()[idx])
        sim_list.append(difflib.SequenceMatcher(a=el.lower(), b=std_name.lower()).ratio())

    max_sim = max(sim_list)
    max_idx = sim_list.index(max_sim)
    NIA_name[student_NIA_names['NIU'].values.tolist()[max_idx]] = el

# Build reverse dictionary name: NIA
name_NIA = {NIA_name[el]: el for el in NIA_name}

At this point we have:

   * student_data: dataframe with data given to the students. Each index is a variable, and each column a NIA
   * student_results: dataframe with student results. Each index is a variable, and each column a name
   * NIA_name: NIA to name dictionary
   * name_NIA: name to NIA dictionary

## 3. Exam evaluation

To carry out the evaluation of the exam, we use the external evaluation libraries.

Function evaluateExam computes the correct solutions for the given data and compares them with the responses provided by the students.

In [12]:
df = pd.DataFrame()

print('Evaluating all students... ')
for NIA in NIA_name:

    name = NIA_name[NIA]
    # print('Evaluating {} ...'.format(name))

    # Evaluate the exam from the data provided to the student and the student response
    dataex = student_data[str(NIA)].to_dict()
    response = student_results[name].to_dict()
    exam_report = evaluateExam(questions, dataex, response)

    # Convert exam_report, which is a nested dictionary, into a pandas dataframe
    # Note that all this conversion to and from dictionaries can be avoided if evaluateExam 
    # worked with dataframes. This is a pending task.
    ex = {}
    for v  in exam_report:
        for w in exam_report[v]:
            ex[(v,w)] = exam_report[v][w]
    
    df[NIA_name[NIA]] = pd.Series(ex)

# Take the transpose to place students in rows, and restate the original variable ordering
# This is because pd.Series does not preserve the order.
cols = list(ex.keys())
df = df.T[cols]

# Pretty print results
df[df.columns[:]].head()

Evaluating all students... 


Unnamed: 0_level_0,sMAP,sMAP,sMAP,sMAP,vMSE,vMSE,vMSE,vMSE,xMSE10,xMSE10,xMSE10,xMSE10,s30,s30,s30,s30,Exam
Unnamed: 0_level_1,Dim,w,s,w·s,Dim,w,s,w·s,Dim,w,s,w·s,Dim,w,s,w·s,Score
DIEGO PENROZ VALENZUELA,OK,1,1,1,No data,1,0,0,No data,1,0,0,Error,0.8,0.8,0.64,4.1
EDUARDO HERRERA ARRUTI,OK,1,0,0,OK,1,0,0,Error,1,0,0,Error,1.0,0.0,0.0,0.0
FELIPE BARBOSA MARTIN,OK,1,0,0,OK,1,0,0,Error,1,0,0,Error,1.0,0.0,0.0,0.0
JESUS LOPEZ BAEZA-ROJANO,OK,1,0,0,Error,1,0,0,Error,1,0,0,No data,1.0,0.0,0.0,0.0
LUIS ANTHONY SANTIVAÑEZ CALDAS,OK,1,1,1,OK,1,1,1,OK,1,1,1,OK,1.0,0.0,0.0,7.5


### 3.1. Penalties

In addition to the evaluation of the results file provided by the student, the final mark depends on other factors:

1. If the student uploaded the code files
2. Delays in delivering the files during the exam.
3. Errors in the delivering process (use of e-mail, incorrect file types, etc).

The following function is used to identify the code uploaded by the student.

In [18]:
def detectCode(datafiles_path, splitsymbol):
    '''
    This function is used to check if the student has uploaded a python or a matlab code file
    '''

    # Read file paths
    datafiles = [f for f in os.listdir(datafiles_path) if isfile(join(datafiles_path, f))]
    
    # Read files
    df = pd.DataFrame()
    print('Processing {0} files in {1} ...'.format(len(datafiles), datafiles_path))
    for dtfile in datafiles:
        
        # This is a flag. If it remains True, a new column will be added to the df
        makedf = True      

        # The tag can be the NIA, the student's name or just the begining of some other file
        tag = dtfile.split(splitsymbol)[0]

        if tag in name_NIA:
        
            if dtfile.endswith('.zip'):
             
                # Read names of .mat files
                files_in_zip = zp.ZipFile(join(datafiles_path, dtfile)).namelist()            

                # mat file selection. This is to disambiguate cases with multiple files
                n_mat = len([f for f in files_in_zip if f.endswith('.m')])
                n_py = len([f for f in files_in_zip if f.endswith('.py') or f.endswith('.ipynb')])

                if n_py * n_mat > 0:
                    print('WARNING: {} has delivered both matlab and python code'.format(name))

                if n_py > 0:
                    code = 'Py'
                elif n_mat > 0:
                    code = 'Mat'
                else:
                    code = 'None'

            elif dtfile.endswith('.py') or  dtfile.endswith('.ipynb'):
                code = 'Py'            
            elif dtfile.endswith('.m'):  
                code = 'Mat'
            else:
                code = 'None'

            df2 = pd.DataFrame()
            df2[tag] = pd.Series(code, index = ['Code'])
            df = pd.concat([df, df2], axis=1)
        else:
            print('    File ignored: {0}'.format(dtfile))
    return df.T

In [14]:
# Identify the code delivered by the students
code_data = detectCode(results_path, splitsymbol='_')
code_data[code_data.columns][:].head()

# Add the code data to the evaluation dataframe
df['Delivery', 'Code'] = code_data
df['Delivery', 'Delay'] = 0.0
df['Delivery', 'Factor'] = 1.0

# Penalties for students that did not delivered any code.
df.loc[df['Delivery', 'Code'] == 'None', ('Delivery', 'Factor')] = 0.5 

Processing 9 files in ../LabEvaluationProjects/ProjectB3_1718_Gbil/student_results/Regulares/ ...
    File ignored: .DS_Store
    File ignored: Icon


In [15]:
# This cell contains project specific instructions.

# PENALTIES:
if project_path == '../LabEvaluationProjects/ProjectB3_1718_GSCGT/':

    # STUDENTS THAT DID NOT DELIVER ANY RESULTS.
    #     ALEJANDRO GOMEZ RODENAS: (no e-mail) Delivers code only.
    #         Results generated with penalty
    df.at['ALEJANDRO GOMEZ RODENAS', ('Delivery', 'Factor')] = p_noresults

    #     ANDONI TAJUELO MUÑOZ: (no e-mail) Does not deliver results file. However, code computes some variables.
    #         Results generated with penalty
    df.at['ANDONI TAJUELO MUÑOZ', ('Delivery', 'Factor')] = p_noresults

    #     HAMZA EL HAMDAOUI ABOUEL ABBES: (e-mail) His computer get blocked and could not generate results file 
    #         savemat command incorrect. Code generated without penalty.
    df.at['HAMZA EL HAMDAOUI ABOUEL ABBES', ('Delivery', 'Factor')] = 1.0

    #     ROCIO BARTOLOME FERNANDEZ: (no e-mail) entrega un fichero Lab12.7z, pero cambia el nombre por Lab12zip
    #         Results generated with penalty.
    df.at['ROCIO BARTOLOME FERNANDEZ', ('Delivery', 'Factor')] = p_noresults

    #     CRISTINA GARCIA GARCIA: (e-mail) Does not deliver results file. Code does not compute any of the variables 
    #     NEREA MERIDA QUERO: (no e-mail) Delivers multiple code versions.
    #     RAQUEL CARMONA LOPEZ (no e-mail) No results file. The code is completely wrong.
    
elif project_path == '../LabEvaluationProjects/ProjectB3_1718_Gbil/':
    # NO INCIDENTS IN THIS GROUP
    pass

Now we are ready to compute the final score

In [16]:
df['Final', 'Score'] = (df['Exam', 'Score'] - p_delay * df['Delivery', 'Delay']) * df['Delivery', 'Factor']
df[df.columns]    # .head()

Unnamed: 0_level_0,sMAP,sMAP,sMAP,sMAP,vMSE,vMSE,vMSE,vMSE,xMSE10,xMSE10,xMSE10,xMSE10,s30,s30,s30,s30,Exam,Delivery,Delivery,Delivery,Final
Unnamed: 0_level_1,Dim,w,s,w·s,Dim,w,s,w·s,Dim,w,...,w·s,Dim,w,s,w·s,Score,Code,Delay,Factor,Score
DIEGO PENROZ VALENZUELA,OK,1,1,1,No data,1,0,0,No data,1,...,0,Error,0.8,0.8,0.64,4.1,Mat,0.0,1.0,4.1
EDUARDO HERRERA ARRUTI,OK,1,0,0,OK,1,0,0,Error,1,...,0,Error,1.0,0.0,0.0,0.0,Py,0.0,1.0,0.0
FELIPE BARBOSA MARTIN,OK,1,0,0,OK,1,0,0,Error,1,...,0,Error,1.0,0.0,0.0,0.0,Py,0.0,1.0,0.0
JESUS LOPEZ BAEZA-ROJANO,OK,1,0,0,Error,1,0,0,Error,1,...,0,No data,1.0,0.0,0.0,0.0,Py,0.0,1.0,0.0
LUIS ANTHONY SANTIVAÑEZ CALDAS,OK,1,1,1,OK,1,1,1,OK,1,...,1,OK,1.0,0.0,0.0,7.5,Py,0.0,1.0,7.5
MATTHEW MOORCROFT,Error,1,0,0,Error,1,0,0,No data,1,...,0,Error,1.0,0.0,0.0,0.0,Py,0.0,1.0,0.0
RAUL LOZANO SANZ,OK,1,0,0,OK,1,0,0,Error,1,...,0,Error,1.0,0.0,0.0,0.0,,0.0,0.5,0.0


## 4. Save results

In [17]:
# Save to excel file.
df.to_excel(output_path + finalnotes_fname, columns=df.columns)