# Automatic Lab Evaluator

## Assessment based on student-provided results

Version History:

Version 0.1 - Jerónimo Arenas García, Jesús Cid Sueiro, Vanessa Gómez Verdejo, Dec. 2016

In [21]:
import numpy as np
import pandas as pd
import os
import shutil
from os.path import isfile, join
import scipy.io as sio
import scipy
import zipfile as zp
import shutil
import difflib

## 1. Read datafiles for all students

Student datafiles can be in any of the following formats:

   * `'.zip'`: When uncompressed, the zip may contain one or several matlab/numpy files. All matlab/numpy files are read and incorporated to a pandas Dataframe where each student is a column, and each index is a variable available for the exam solution
   * `.mat'`: All data variables for the students are given in a single matlab file
   * `.npz'`: All data variables for the students are given in a single numpyº file

In [2]:
def readdatafiles(datafiles_path, splitsymbol):

    temporary_dir = './tmp'
    df = pd.DataFrame()
    
    datafiles = [f for f in os.listdir(datafiles_path) if isfile(join(datafiles_path, f))]
    for dtfile in datafiles:
        if dtfile.endswith('zip'):
            #All files will be extracted and the contents incorporated to the Dataframe
            NIA = dtfile.split(splitsymbol)[0]
            #print NIA
            idx = []
            val = []
            zpobj = zp.ZipFile(join(datafiles_path, dtfile))
            for fileinzip in zpobj.namelist():
                #Remove files beginning with '_' that may be hidden OS files 
                if (not fileinzip.startswith('_')) and (fileinzip.endswith('mat') or fileinzip.endswith('npz')):
                    #Matlab/NPZ files are extracted to a temporal subfolder
                    zpobj.extract(fileinzip, temporary_dir)
                    if fileinzip.endswith('mat'):
                        data = sio.loadmat(join(temporary_dir,fileinzip))
                    else:
                        data = np.load(join(temporary_dir,fileinzip))
                        if data.keys()==['arr_0']:
                            data = data['arr_0'].tolist()
                    #Read all variable names and the corresponding data values
                    for var in [el for el in data.keys() if not el.startswith('_')]:
                        idx.append(var)
                        val.append(data[var])
            #If
            if idx:
                #print idx
                #df = [df, pd.Series(val,index=idx, name=NIA)]
                df = pd.concat([df, pd.Series(val,index=idx, name=NIA)], axis=1)
                #df[NIA] = pd.Series(val,index=idx)
                #print pd.Series(val,index=idx)
                #print df[NIA]
                
            #Remove temporary directory, if it has been created
            if os.path.exists(temporary_dir):
                shutil.rmtree(temporary_dir)
                    
        elif dtfile.endswith('mat') or dtfile.endswith('npz'):
            NIA = dtfile.split(splitsymbol)[0]
            #print NIA
            idx = []
            val = []
            if dtfile.endswith('mat'):
                data = sio.loadmat(join(datafiles_path, dtfile))
            else:
                data = np.load(join(datafiles_path, dtfile))
            #Read all variable names and the corresponding data values
            for var in [el for el in data.keys() if not el.startswith('_')]:
                idx.append(var)
                val.append(data[var])
                
            if idx:
                df = pd.concat([df, pd.Series(val,index=idx, name=NIA)], axis=1)
                        
    return df
        

In [3]:
###########################################
# EXAM DEPENDENT VARIABLE
datafiles_path = '../GenerateData/'
##########################################

student_data = readdatafiles(datafiles_path, splitsymbol='_')

print 'Number of students in dataframe: ' + str(student_data.shape[1])
print 'Number of variables read: ' + str(student_data.shape[0])

print 'Displaying data for first three students ... '
student_data[student_data.columns[:3]]

Number of students in dataframe: 66
Number of variables read: 9
Displaying data for first three students ... 


Unnamed: 0,100276687,100284423,100290949
k_knn,[[5]],[[6]],[[7]]
str_reg,"[[-1.81730391168], [-1.31959494449], [-3.12276...","[[-1.95300967366], [-0.219953516073], [0.77476...","[[-6.78524715177], [-0.167566954327], [-0.5772..."
sval_reg,"[[0.469566566998], [0.434864239432], [-0.20126...","[[-1.69798029129], [-0.998549158711], [-7.1524...","[[5.21761697592], [1.21333038463], [-2.5538643..."
xtr_reg,"[[0.871257484747, -0.820094192472], [-0.644532...","[[0.306152918607, 0.399930040105], [0.38405830...","[[-1.33671463676, -2.14137382412], [0.24677755..."
xtr_rl,"[[0.140463102615, 0.247669377199, -0.041431362...","[[-0.147325230568, 0.954388079256, -0.27748399...","[[0.524746496819, 0.296185529951, 0.7269493561..."
xval_reg,"[[-0.0087916089067, -2.38238053595], [-0.42707...","[[1.18442826324, -0.100688112884], [1.07725826...","[[-0.046398183217, 1.46304176031], [-0.1234091..."
xval_rl,"[[0.314502619466, -0.134699458319, 0.043601035...","[[0.464711356861, 0.779820082528, -0.538093560...","[[-1.17785941539, 0.441021282575, 0.8544769675..."
ytr_rl,"[[1], [1], [0], [0], [0], [0], [0], [0], [1], ...","[[1], [0], [0], [0], [0], [1], [0], [0], [1], ...","[[1], [0], [0], [0], [0], [0], [0], [0], [1], ..."
yval_rl,"[[0], [0], [1], [1], [0], [1], [1], [0], [1], ...","[[1], [0], [0], [1], [0], [0], [1], [1], [0], ...","[[0], [0], [1], [0], [0], [0], [1], [1], [1], ..."


## 2. Read answers provided by students


### 2.1. Read student results into panda dataframe

In [None]:
###########################################
# EXAM DEPENDENT VARIABLE
results_path = '../Entregas/'
#Requested variable names in the wording of the exam
truenames = ['s0', 'E2val', 's_prom', 'E2val_knn', 'w_mean', 'w_cov',
            'mx','sx','xn_tr','xn_val','w10', 'rho', 'lg10', 'n1', 'emin', 'nvar', 'wmin']
###########################################

student_results = readdatafiles(results_path, splitsymbol='_')
newindex = truenames+[el for el in student_results.index.tolist() if el not in truenames]
student_results = student_results.reindex(newindex)

print 'Number of students in dataframe: ' + str(student_results.shape[1])
print 'Number of variables read: ' + str(student_results.shape[0])

print 'Displaying data for first three students ... '
student_results[student_results.columns[:13]]

### 2.2. Common Mistakes on variable names

In view of all variable names provided by all students, we may decide to allow alternative names for variables without any penalty

In [5]:
print 'Number of students in dataframe: ' + str(student_results.shape[1])

print '\nDisplaying number of missing data per variable name. \nThose with a large number are a potential common mistake\nfor a variable name'

student_results.isnull().sum(axis=1)

Number of students in dataframe: 54

Displaying number of missing data per variable name. 
Those with a large number are a potential common mistake
for a variable name


s0            6
E2val         6
s_prom        4
E2val_knn     6
w_mean       18
w_cov        18
mx            4
sx            4
xn_tr         4
xn_val        8
w10          15
rho          20
lg10         20
n1           27
emin         35
nvar         37
wmin         35
k_knn        40
n0           53
s            53
str_reg      40
sval_reg     40
xn_va        53
xtr_reg      40
xtr_rl       40
xval_reg     40
xval_rl      40
ytr_rl       40
yval_rl      40
dtype: int64

In [None]:
###########################################
# EXAM DEPENDENT VARIABLE

#Dictionary with accepted mistakes in the following format
#  Expected variable name : Accepted mistake
Mistakes = {'xn_val': 'xn_va'};
##########################################

for el in Mistakes:
    student_results.loc[el] = student_results.loc[el].fillna(student_results.loc[Mistakes[el]])
    
for el in student_results.index.tolist():
    if el not in truenames:
        student_results.drop(el, inplace=True)
        
student_results[student_results.columns[:3]]

### 2.3. Name to NIA dictionary

Finally, since datafiles are created by NIA and results are available per student name, we need to create a dictionary connecting them

In [7]:
###########################################
# EXAM DEPENDENT VARIABLE
excel_file = 'ListaClase.xlsx'
language = 'spanish'
###########################################

student_NIA_names = pd.read_excel(excel_file)

#UTF-8 encoding of everything
for fld in student_NIA_names.keys():
    if fld != 'NIU':
        student_NIA_names[fld] = student_NIA_names[fld].str.encode('utf8')

NIA_name = {}

for el in student_results.columns.tolist():

    sim_list = []

    for idx,NIA in enumerate(student_NIA_names['NIU'].values):
    
        if language=='english':
            std_name = student_NIA_names['First name'].values.tolist()[idx] + ' ' + \
                            student_NIA_names['Surname'].values.tolist()[idx]
            sim_list.append(difflib.SequenceMatcher(a=el.lower(), b=std_name.lower()).ratio())
        else:
            std_name = student_NIA_names['Nombre'].values.tolist()[idx] + ' ' + \
                            student_NIA_names['Apellido(s)'].values.tolist()[idx]
            sim_list.append(difflib.SequenceMatcher(a=el.lower(), b=std_name.lower()).ratio())
    
    max_sim = max(sim_list)
    max_idx = sim_list.index(max_sim)
    
    NIA_name[student_NIA_names['NIU'].values.tolist()[max_idx]] = el

#Create dictionary for students that did not hand in anything
NIA_name_nodata = {}
lista1 = [el for el in student_NIA_names['NIU'].values if el not in NIA_name.keys()]
lista2 = [student_NIA_names[student_NIA_names['NIU']==NIA]['Nombre'].values.tolist()[0] + ' ' + \
          student_NIA_names[student_NIA_names['NIU']==NIA]['Apellido(s)'].values.tolist()[0] for NIA in lista1]   
for el in zip(lista1,lista2):
    NIA_name_nodata[el[0]] = el[1]
    
#Create name to NIA dictionary
name_NIA = {NIA_name[el]: el for el in NIA_name}
name_NIA_nodata = {NIA_name_nodata[el]: el for el in NIA_name_nodata}


In [None]:
#print name_NIA
#print NIA_name
for el in NIA_name.keys():
    print str(el) + ' : ' + NIA_name[el]


At this point we have:

   * student_data: dataframe with data given to the students. Each index is a variable, and each column a NIA
   * student_results: dataframe with student results. Each index is a variable, and each column a name
   * NIA_name: NIA to name dictionary
   * name_NIA: name to NIA dictionary

## 3. Exam solution

In this section we implement the solution to the exam. This is a function that takes the variables generated for a given student and the answers provided by the student, and generates a structure with all posible answers, possibly with a penalty term associated to each answer.

In [9]:
#print NIA_name

In [None]:
#Busqueda por nombre
student = 
#student = 
print student_data[str(name_NIA[student])]
print " "
print student_results[student.decode('utf8')]
##Busqueda por NIA
#NIA = '100339092'
#print student_data[NIA]
#print " "
#print student_results[NIA_name[int(NIA)]]

In [23]:
### Some methods required for the Classification solution
def normalize(X, mx=None, sx=None):

    # Compute means and standard deviations
    if mx is None:
        mx = np.mean(X, axis=0)
    if sx is None:
        sx = np.std(X, axis=0)

    # Normalize
    X0 = (X-mx)/sx

    return X0, mx, sx

# Define the logistic function
def logistic(x):
    p = 1.0 / (1 + np.exp(-x))
    return p

# MAP trainer.
def logregFitR(Z_tr, Y_tr, rho, C, n_it):

    # Initialize variables
    n_dim = Z_tr.shape[1]
    nll_tr = np.zeros(n_it)
    w = 0*np.random.randn(n_dim, 1)

    # Running the gradient descent algorithm
    for n in range(n_it):

        # Compute posterior probabilities for weight w
        p1_tr = logistic(np.dot(Z_tr, w))
        p0_tr = logistic(-np.dot(Z_tr, w))
        # Compute negative log-likelihood
        nll_tr[n] = (- np.dot(Y_tr.T, np.log(p1_tr)) - np.dot((1-Y_tr).T, np.log(p0_tr)))
        # Update weights
        w = (1-2*rho/C)*w + rho*np.dot(Z_tr.T, Y_tr - p1_tr)

    return w, nll_tr

# MAP trainer.
def computeNLL(Z, Y, w):

    # Compute posterior probabilities for weight w
    p1 = logistic(np.dot(Z, w))
    p0 = logistic(-np.dot(Z, w))
    # Compute negative log-likelihood
    nll = (- np.dot(Y.T, np.log(p1)) - np.dot((1-Y).T, np.log(p0)))

    return nll

# Compute predictions for a given model
def logregPredict(Z, w):

    # Compute posterior probability of class 1 for weights w.
    p = logistic(np.dot(Z, w))
    # Classify
    D = [int(round(pn)) for pn in p]

    return p, D

In [24]:
from sklearn import neighbors

def isVarFail(x):
    
    # Check if the variable contains all None.
    try:
        return np.all(np.isnan(x))
    except:
        return True

def SolveClassif(Xtrain, Xval, xval_rl, ytr_rl, yval_rl, rho_, wst=None):

    ## Sec. 3c
    # Set parameters
    C = 100
    n_it = 100

    # Compute extended vector
    n_tr = Xtrain.shape[0]
    Z_tr = np.concatenate((np.ones((n_tr, 1)), Xtrain), axis=1)
    if wst is None:
        w10_, nll = logregFitR(Z_tr, ytr_rl, rho_, C, n_it)
    else:
        w10_ = wst

    # This variable is requested, but actually not used for evaluation.
    rho = [(np.array(rho_), 1)]

    ## Sec. 3d Compute NLL
    n_val = Xval.shape[0]
    Z_val = np.concatenate((np.ones((n_val, 1)), Xval), axis=1)
    lg10a = computeNLL(Z_val, yval_rl, w10_)

    ## Sec. 3e              
    p1, D1 = logregPredict(Z_val, w10_)
    n1a = np.sum(np.array(D1) == 1)

    # The following alternative response is incorrect, 
    # but the students are certainly induced to do it in the statement...
    if np.array_equal(Xval.shape, xval_rl.shape):
        Z_valb = np.concatenate((np.ones((n_val, 1)), xval_rl), axis=1)
        lg10b = computeNLL(Z_valb, yval_rl, w10_)
        p1b, D1b = logregPredict(Z_valb, w10_)
        n1b = np.sum(np.array(D1b) == 1)
    else:
        lg10b = lg10a
        n1b = n1a

    ## Sec. 3f.
    # Removing single variables
    emin_ = 10000*n_val
    for i in range(10):

        Ztr_i = np.delete(Z_tr, i+1, 1)
        Zval_i = np.delete(Z_val, i+1, 1)

        wi, nll = logregFitR(Ztr_i, ytr_rl, rho_, C, n_it)
        p1, D1 = logregPredict(Zval_i, wi)

        ei = np.mean(np.array(D1)[:,np.newaxis] != yval_rl)

        if ei <= emin_:
            emin_ = ei
            nvara = i + 1
            nvarb = i     # This is not correct, but I accept it because the statement is not much clear about this.
            wmin_ = wi

    return w10_, lg10a, lg10b, n1a, n1b, emin_, nvara, nvarb, wmin_
    
def SolveLabXX(data, st_solution):
    """Solver for the practical
    Input parameters:
    data: A series with the data given to the student
    st_solution: The solution provided by the student
    
    Output: A dataseries where each element is a list of tuples
    with the format [(solution1, factor1), (solution2, factor2)]
    
    Factors are multiplicative factors to account for possible
    penalties. A factor 1 should be given to a solution that should
    not be penalized.
    """
    
    ds_values = []
    ds_indexes = []
    
    # #############
    # ## REGRESSION

    ## Sec. 2.1
    s0 = []
    
    s0.append((np.mean(data['str_reg']),1))
    
    ds_values.append(s0)
    ds_indexes.append('s0')
    
    E2val = []
    
    
    E2val.append((np.mean((data['sval_reg'] - s0[0][0])**2),1))
    #Penalizamos al 50% si hicieron el total, en lugar del promedio
    E2val.append((np.sum((data['sval_reg'] - s0[0][0])**2),.5))
    
    #Usando el valor de s0 proporcionado por el alumno
    st_s0 = np.array(st_solution['s0']).flatten()[0]
    if not np.all(np.isnan(st_s0)):
        E2val.append((np.mean((data['sval_reg'] - st_s0)**2),.7))
        E2val.append((np.sum((data['sval_reg'] - st_s0)**2),.35))
    
    ds_values.append(E2val)
    ds_indexes.append('E2val')

    ## Sec. 2.2
    s_prom = []
    E2val_knn = []

    knn = neighbors.KNeighborsRegressor(data['k_knn'][0,0], weights='uniform')
    sval = knn.fit(data['xtr_reg'], data['str_reg']).predict(data['xval_reg'])
    s_prom.append((np.mean(sval), 1))
    E2val_knn.append((np.mean((data['sval_reg'] - sval)**2),1))
    knn = neighbors.KNeighborsRegressor(data['k_knn'][0,0], weights='distance')
    sval = knn.fit(data['xtr_reg'], data['str_reg']).predict(data['xval_reg'])
    s_prom.append((np.mean(sval), 1))
    E2val_knn.append((np.mean((data['sval_reg'] - sval)**2),1))

    #Si se calcula sobre los datos de train x0.5
    knn = neighbors.KNeighborsRegressor(data['k_knn'][0,0], weights='uniform')
    sval = knn.fit(data['xtr_reg'], data['str_reg']).predict(data['xtr_reg'])
    s_prom.append((np.mean(sval), 1))
    E2val_knn.append((np.mean((data['str_reg'] - sval)**2),.5))
    knn = neighbors.KNeighborsRegressor(data['k_knn'][0,0], weights='distance')
    sval = knn.fit(data['xtr_reg'], data['str_reg']).predict(data['xtr_reg'])
    s_prom.append((np.mean(sval), 1))
    E2val_knn.append((np.mean((data['str_reg'] - sval)**2),.5))
    
    ds_values.append(s_prom)
    ds_indexes.append('s_prom')
    ds_values.append(E2val_knn)
    ds_indexes.append('E2val_knn')

    ## Sec. 2.3
    Z = np.hstack((np.ones((500,1)),data['xtr_reg'],np.exp(data['xtr_reg'])))

    Sigma_p = 2*np.eye(5)
    var_n = .5

    w_cov = []
    w_cov.append((np.linalg.inv(Z.T.dot(Z)/var_n + np.linalg.inv(Sigma_p)),1))

    ds_values.append(w_cov)
    ds_indexes.append('w_cov')

    w_mean = []
    w_mean.append((w_cov[0][0].dot(Z.T).dot(data['str_reg'])/var_n,1))

    ds_values.append(w_mean)
    ds_indexes.append('w_mean')

    # #################
    # ## CLASSIFICATION

    ## Standard correct response:

    ## Sec. 3a
    # Get data
    xtr_rl = data['xtr_rl']
    xval_rl = data['xval_rl']
    ytr_rl = data['ytr_rl']
    yval_rl = data['yval_rl']

    # Compute mean and std and normalize
    Xtrain, mx0, sx0 = normalize(xtr_rl)
    Xval, mx0, sx0 = normalize(xval_rl, mx0, sx0)

    # Save means and variances
    mx = [(mx0, 1)]
    sx = [(sx0**2, 1), (sx0, 0.9)]   # The 2n response is incorrect, but the statement induces to compute it.

    ## Sec. 3b
    # Normalized variables have been computed in the previous section
    xn_tr = [(Xtrain, 1)]
    xn_val = [(Xval, 1)]

    # Solve the rest of the questions
    rho_ = 0.001
    w10_, lg10a, lg10b, n1a, n1b, emin_, nvara, nvarb, wmin_ = SolveClassif(
        Xtrain, Xval, xval_rl, ytr_rl, yval_rl, rho_)
    w10 = [(w10_, 1)]
    rho = [(np.array(rho_), 1)]   # This variable is requested, but not used for evaluation
    lg10 = [(lg10a, 1), (lg10b, 0.9)]
    n1 = [(n1a, 1), (n1b, 0.9)]
    emin = [(emin_, 1)]
    nvar = [(nvara, 1), (nvarb, 1)]
    wmin = [(wmin_, 1)]

    ## ALTERNATIVE 1: USING A DIFFERENT rho: 
    # Set parameters
    rho_2 = rho_
    if not np.all(np.isnan(st_solution['rho'])):
        if np.array(st_solution['rho']).flatten().shape[0] == 1:
            rho_2 = st_solution['rho']

            w10_2, lg10a2, lg10b2, n1a2, n1b2, emin_2, nvar_2a, nvar_2b, wmin_2 = SolveClassif(
                Xtrain, Xval, xval_rl, ytr_rl, yval_rl, rho_2)
            if rho_2 > 0:
                w10 = [(w10_, 0.8), (w10_2, 1)]    # I modify the score for w10_ because of claiming a different rho.
                lg10 += [(lg10a2, 1), (lg10b2, 0.9)]
                n1 += [(n1a2, 1), (n1b2, 0.9)]
                emin += [(emin_2, 1)]
                nvar += [(nvar_2a, 1), (nvar_2b, 1)]
                wmin += [(wmin_2, 1)]
            else:
                w10 = [(w10_, 0.8), (w10_2, 0.1)]   # I modify the score for w10_ because of claiming a different rho.
                lg10 += [(lg10a2, 1), (lg10b2, 0.9)]
                n1 += [(n1a2, 1), (n1b2, 0.9)]
                emin += [(emin_2, 0.1)]
                nvar += [(nvar_2a, 0.1), (nvar_2b, 0.1)]
                wmin += [(wmin_2, 0.1)]

    ## ALTERNATIVE 2: USING STUDENT'S TRAINING DATA MATRIX
    # From now on, I use the declared rho
    if not isVarFail(st_solution['xn_tr']):
        Xtrain2 = st_solution['xn_tr']
        if np.array_equal(Xtrain.shape, Xtrain2.shape):
            w10_3, lg10a3, lg10b3, n1a3, n1b3, emin_3, nvar_3a, nvar_3b, wmin_3 = SolveClassif(
                Xtrain2, Xval, xval_rl, ytr_rl, yval_rl, rho_2)

            if rho_2 > 0:
                w10 += [(w10_3, 1)]    
                lg10 += [(lg10a3, 1), (lg10b3, 0.9)]
                n1 += [(n1a3, 1), (n1b3, 0.9)]
                emin += [(emin_3, 1)]
                nvar += [(nvar_3a, 1), (nvar_3b, 1)]
                wmin += [(wmin_3, 1)]
            else:
                w10 += [(w10_3, 0.1)]
                lg10 += [(lg10a3, 1), (lg10b3, 0.9)]
                n1 += [(n1a3, 1), (n1b3, 0.9)]
                emin += [(emin_3, 0.1)]
                nvar += [(nvar_3a, 0.1), (nvar_3b, 0,1)]
                wmin += [(wmin_3, 0.1)]

    ## ALTERNATIVE 2: USING STUDENT'S WEIGHTS
    w10st = np.array(st_solution['w10'])
    if not isVarFail(w10st):
        if np.array_equal(w10_.shape, w10st.shape):
            w10_4, lg10a4, lg10b4, n1a4, n1b4, emin_4, nvar_4a, nvar_4b, wmin_4 = SolveClassif(
                Xtrain, Xval, xval_rl, ytr_rl, yval_rl, rho_2, w10st)
            lg10 += [(lg10a4, 0.9), (lg10b4, 0.8)]
            n1 += [(n1a4, 0.9), (n1b4, 0.8)]

    ## ALTERNATIVE 3: USING STUDENT'S WEIGHTS AND DATA VALIDATION MATRIX
    # Now the same, but using the student weights and data matrix
    if not isVarFail(w10st) and not isVarFail(st_solution['xn_val']):
        Xval2 = st_solution['xn_val']
        if np.array_equal(Xval.shape, Xval2.shape) and np.array_equal(w10_.shape, w10st.shape):
            w10_5, lg10a5, lg10b5, n1a5, n1b5, emin_5, nvar_5a, nvar_5b, wmin_5 = SolveClassif(
                Xtrain, Xval2, xval_rl, ytr_rl, yval_rl, rho_2, w10st)
            lg10 += [(lg10a5, 0.9), (lg10b5, 0.8)]
            n1 += [(n1a5, 0.9), (n1b5, 0.8)]

    ds_indexes.append('mx')
    ds_values.append(mx)
    ds_indexes.append('sx')
    ds_values.append(sx)
    ds_indexes.append('xn_tr')
    ds_values.append(xn_tr)
    ds_indexes.append('xn_val')
    ds_values.append(xn_val)
    ds_indexes.append('w10')
    ds_values.append(w10)
    ds_indexes.append('rho')
    ds_values.append(rho)
    ds_indexes.append('lg10')
    ds_values.append(lg10)    
    ds_indexes.append('n1')
    ds_values.append(n1)
    ds_indexes.append('emin')
    ds_values.append(emin)
    ds_indexes.append('nvar')
    ds_values.append(nvar)
    ds_indexes.append('wmin')
    ds_values.append(wmin)
    
    return pd.Series(ds_values, ds_indexes)
    

In [None]:
print student_results[student]
print " "
print SolveLabXX(student_data[str(name_NIA[student])], student_results[student])

## 4. Evaluation of all students

In [15]:
def automatic_evaluator(NIA, student_results, solution, weights, tolerance):
    
    val=[]
    idx=[]
    
    if len(solution.keys())==len(weights) and len(solution.keys())==len(tolerance):

        for wgh, el, tol in zip(weights,solution.keys(),tolerance):

            var_summary = []
            #If the student has delivered the variable, append 1; otherwise 0
            try:
                isAllNaN = np.all(np.isnan(student_results[el]))
            except:
                isAllNaN = True
                
            if not isAllNaN:    # np.all(np.isnan(student_results[el])):
                var_summary.append(1)
                #Check all possible solutions against the one provided by the student
                factors = [entry[1] for entry in solution[el]
                           if np.array_equal(np.array(student_results[el]).flatten().shape,
                                             np.array(entry[0]).flatten().shape) and
                              np.mean(np.abs(np.array(entry[0]).flatten() - 
                                      np.array(student_results[el]).flatten())) < tol]
                print el
                print [np.mean(np.abs(np.array(entry[0]).flatten() - 
                                      np.array(student_results[el]).flatten())) for entry in solution[el]]
                print factors
                
                if len(factors):
                    max_factor = max(factors)
                    var_summary.extend([max_factor, wgh, max_factor*wgh])
                else:
                    var_summary.extend([0, wgh, 0])
            else:
                var_summary.extend([0, 0, wgh, 0])

            # Keep values corresponding to current variable
            val.append(var_summary)
            idx.append(el)
            
        final_score = sum([item[-1] for item in val])
        val.append(final_score)
        idx.append('FinalScore')
        
    else:
        print 'The number of weights and variables to evaluate differ. Please, check'
    
    val.append(NIA)
    idx.append('NIA')
    return pd.Series(val,index=idx)

In [None]:
###########################################
# EXAM DEPENDENT VARIABLE
excel_output = 'Notas_MIT.xlsx'
all_students = 1 #Include in the list students that did not carry out the exam
weightsR = [.5, .5, 1, 1, 1, 1]
weightsC = [.5, .5, .5, .5, 1, 0, 1, 1, 1.0/3, 1.0/3, 1.0/3]
sR = sum(weightsR)
sC = sum(weightsC)
weights = [w*6.0/sR for w in weightsR] + [w*4.0/sC for w in weightsC]
print weights

tolerance = [1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 5e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2, 1e-2]
###########################################

df = pd.DataFrame()

print len(NIA_name.keys())

for NIA in NIA_name.keys():
    print NIA_name[NIA]
    solution = SolveLabXX(student_data[str(NIA)], student_results[NIA_name[NIA]])
    df[NIA_name[NIA].decode('utf8')] = automatic_evaluator(NIA, student_results[NIA_name[NIA]], solution, weights, 
                                                           tolerance)
print df

if all_students:
    for NIA in NIA_name_nodata.keys():
        df[NIA_name_nodata[NIA].decode('utf8')] = pd.Series([NIA],index=['NIA'])
        
df.T.to_excel(excel_output,columns=df.T.columns)