In [None]:
# Takes in a given comparison file (in the correct format), and calculates the KCRV and DoEs using the largest consistent
# subset. Returns an excel file with the measurements, uncertainties, participants included, DoEs and KCRVs.

# Need to specify the directory where the comparison files are saved. The output files are saved in the same directory that the
# code is being run from.

In [5]:
import xlrd
import math
import xlwt
from scipy import stats
import numpy as np
from scipy.stats import chi2
import operator as op
import itertools

In [28]:
# Run the methods and read data files so we can call those functions
%run "Methods-Using-GLS.ipynb" -G

In [36]:
# Specify the directory in which the comparison data files are saved
# List all files which you want to analyse
files = ["CCL-K1-1.xls"]

In [37]:
for file in files:

    # Read data in from file
    [y, U, NumArtefacts, NumMeasurements, NumParticipants, ave_U, X, w,ParticipantNames,MeasurementTitles,
     count_per_lab, ArtefactNames] = read_data(file)

    # Create workbook to save data
    wb = xlwt.Workbook()
    
    # Add sheet with measurements
    ws = wb.add_sheet('y')
    ws.write(0,0,'Participant')
    ws.write(0,1,'Measurement')
    ws.write(0,2,'Uncertainty (k=1)')
    for i in range(0,NumMeasurements):
        ws.write(i+1,1,y[i,0])
        ws.write(i+1,0,ParticipantNames[i])
        ws.write(i+1,2,ave_U[i])
        
    # Calculate beta, CovBeta, ChiSq
    [gamma, beta, CovBeta] = calculate_beta(y, U, X, w)
    ChiSq_A = model_A(beta, ave_U)
    ChiSq_B = model_B(y, U, X, beta)
    
    # Apply cut-off and update weights
    [beta_C,CovBeta_C,U_adjust,w_cs,w_C,ChiSq,w_MP,cut,u_cut] = Cut(y, U, w, X, gamma, beta, CovBeta, ChiSq_B, ave_U, 'B')
    ws = wb.add_sheet('Cut-off')
    ws.write(0,0,'Participant')
    ws.write(0,1,'Unc before cut-off')
    ws.write(0,2,'Unc after cut-off')
    ws.write(0,4,'Cut-off unc')
    ws.write(0,5,u_cut)
    for i in range(0,NumParticipants):
        ws.write(i+1,0,ParticipantNames[i])
        ws.write(i+1,1,ave_U[i])
        ws.write(i+1,2,cut[i])
        
    # Apply OO
    [beta_OO,CovBeta_OO,U_adjust,w_cs,w_OO,ChiSq,w_MP,cut,u_cut] = Cut(y, U, w, X, gamma, beta, CovBeta, ChiSq_B, ave_U, 'B', 
                                                             include_OO=True)
           
    # Add sheet with the excluded participants from OO
    ws = wb.add_sheet('OO')
    ws.write(0,0,'Participant')
    ws.write(0,1,'Included')
    for i in range(0,NumParticipants):
        ws.write(i+1,0,ParticipantNames[i])
        ws.write(i+1,1,int(w_cs[i]))
        
    # Apply MP model A
    [beta_MP_A,CovBeta_MP_A,U_adjust_A,w_cs,w_MP,ChiSq_MP_A,w_MP_A,cut,u_cut] = Cut(y, U, w, X, gamma, beta, CovBeta, ChiSq_A,
                                                                                ave_U, 'A', include_OO=True, include_MP=True)

    
    # Add sheet with the information from MP model A
    ws = wb.add_sheet('MP model A')
    ws.write(0,0,'Participant')
    ws.write(0,1,'Weight after MP')
    ws.write(0,4,'Uncertainty added')
    ws.write(0,5,U_adjust_A[0,0])
    ws.write(1,4,'ChiSq before')
    ws.write(1,5,ChiSq_A[0,0])
    ws.write(2,4,'ChiSq after')
    ws.write(2,5,ChiSq_MP_A[0,0])
    ws.write(3,4,'ChiSq expected')
    ws.write(3,5,chi2.isf(0.05,NumParticipants-1))
    ws.write(4,4,'df')
    ws.write(4,5,NumParticipants-1)
    for i in range(0,NumParticipants):
        ws.write(i+1,0,ParticipantNames[i])
        ws.write(i+1,1,w_MP_A[i,0])
        
    # Add sheet with DoEs A
    ws = wb.add_sheet('DoEs, MP model A')
    ws.write(0,0,'Participant')
    ws.write(0,1,'DoE')
    ws.write(0,2,'U(DoE)')
    for i in range(0,NumParticipants):
        ws.write(i+1,0,ParticipantNames[i])
        ws.write(i+1,1,beta_MP_A[i+NumArtefacts,0])
        ws.write(i+1,2,np.sqrt(CovBeta_MP_A[i+NumArtefacts,0]))

    # Add sheet with the theta A
    ws = wb.add_sheet('Theta, MP model A')
    ws.write(0,0,'Artefact')
    ws.write(0,1,'KCRV')
    ws.write(0,2,'U(KCRV)')
    for i in range(0,NumArtefacts):
        ws.write(i+1,0,ArtefactNames[i])
        ws.write(i+1,1,beta_MP_A[i,0])
        ws.write(i+1,2,np.sqrt(CovBeta_MP_A[i,0]))
        

     # Apply MP model B
    [beta_MP_B, CovBeta_MP_B, U_adjust_B, w_cs, w_MP, ChiSq_MP_B, w_MP_B,cut,u_cut] = Cut(y, U, w, X, gamma, beta, CovBeta, 
                                                                                          ChiSq_B,ave_U, 'B', include_OO=True, 
                                                                                          include_MP=True)
           
    # Add sheet with the information from MP model B
    ws = wb.add_sheet('MP model B')
    ws.write(0,0,'Participant')
    ws.write(0,1,'Weight after MP')
    ws.write(0,4,'Uncertainty added')
    ws.write(0,5,U_adjust_B)
    ws.write(1,4,'ChiSq before')
    ws.write(1,5,ChiSq_B[0,0])
    ws.write(2,4,'ChiSq after')
    ws.write(2,5,ChiSq_MP_B[0,0])
    ws.write(3,4,'ChiSq expected')
    ws.write(3,5,chi2.isf(0.05,NumMeasurements+1-NumArtefacts-NumParticipants))
    ws.write(4,4,'df')
    ws.write(4,5,NumMeasurements+1-NumArtefacts-NumParticipants)
    for i in range(0,NumParticipants):
        ws.write(i+1,0,ParticipantNames[i])
        ws.write(i+1,1,w_MP_B[i,0])

        
    # Add sheet with DoEs B
    ws = wb.add_sheet('DoEs, MP model B')
    ws.write(0,0,'Participant')
    ws.write(0,1,'DoE')
    ws.write(0,2,'U(DoE)')
    for i in range(0,NumParticipants):
        ws.write(i+1,0,ParticipantNames[i])
        ws.write(i+1,1,beta_MP_B[i+NumArtefacts,0])
        ws.write(i+1,2,np.sqrt(CovBeta_MP_B[i+NumArtefacts,0]))

    # Add sheet with the theta B
    ws = wb.add_sheet('Theta, MP model B')
    ws.write(0,0,'Artefact')
    ws.write(0,1,'KCRV')
    ws.write(0,2,'U(KCRV)')
    for i in range(0,NumArtefacts):
        ws.write(i+1,0,ArtefactNames[i])
        ws.write(i+1,1,beta_MP_B[i,0])
        ws.write(i+1,2,np.sqrt(CovBeta_MP_B[i,0]))
    

    wb.save(file+'-cut,OO,MP.xls')

