In [1]:
%run linear_optimization.ipynb

## Heplers

In [2]:
'''
1. fields: table column names
2. num_patients: number of patients
3. ratio: the probability for disimprovement
each item only improves if improve_only = True, and it improves based on the dis (uniform/original)
2. file_name is the output file name
3. need_groups is a list of the cutting points with length 3. 
   For need_groups = [a,b,c], 
   [0:a] are the items that must improve with IBHS(disimproves with FBMHS), 
   [a:b] are the items that must improve with FBMHS(disimproves with IBHS), 
   [b:c] are the items improve randomly
'''
def make_benchmark_data(fields, num_patient, need_groups = [5,10,90],
                    file_name = 'benchmark_dataset.csv',
                    improv = [0,1,2,3], disImprov = [0,-1]):
    rows = []
    for i in range(num_patient):
        #ma number
        row = [str(i)]
        #program
        if i < num_patient//2:
            program = 'IBHS'
        else:
            program = 'FBMHS'
        
        row.append(program)

        # iterate through all needs
        for ni in range(need_groups[-1]):
            if ni < need_groups[0]: # these needs should mostly/all improve for IBHS patients
                if program == 'IBHS': # for IBHS patients
                    improvement = random.choice(improv)
                    row.append(improvement)
                else: # for FBMHS patients
                    row.append(random.choice(disImprov))
                    
            elif ni >= need_groups[0] and ni < need_groups[1]: # these needs should mostly/all improve for FBMHS patients
                if program == 'FBMHS': # for FBMHS patients
                    improvement = random.choice(improv)
                    row.append(improvement)
                else: # for IBHS patients
                    row.append(random.choice(disImprov))
            else: #these needs improve/disImprov randomly 
                improvement = random.choice(improv+disImprov)
                row.append(improvement)
        rows.append(row)
    return pd.DataFrame(rows, columns = fields)

In [3]:
def validate_result(x_item, need_splits):
    ibhs = x_item[0:need_splits[0]]
    number_of_minus_one_split_0 = sum(ibhs==-1.0)
    number_of_one_split_0 = sum(ibhs==1.0)
    number_of_between_split_0 = sum((ibhs < 1.0) & (ibhs > -1.0))
    
    fbmhs = x_item[need_splits[0]:need_splits[1]]
    number_of_minus_one_split_1 = sum(fbmhs==-1.0)
    number_of_one_split_1 = sum(fbmhs==1.0)
    number_of_between_split_1 = sum((fbmhs < 1.0) & (fbmhs > -1.0))
    
    rest = x_item[need_splits[1]:]
    number_of_minus_one_split_2 = sum(rest==-1.0)
    number_of_one_split_2 = sum(rest==1.0)
    number_of_between_split_2 = sum((rest < 1.0) & (rest > -1.0))
    
    return pd.DataFrame(np.array([[number_of_one_split_0, number_of_minus_one_split_0, number_of_between_split_0], 
                                 [number_of_one_split_1, number_of_minus_one_split_1, number_of_between_split_1], 
                                 [number_of_one_split_2, number_of_minus_one_split_2, number_of_between_split_2]]),
                       columns = ['number_of_item_is_1', 
                                  'number_of_item_is_-1', 
                                  'number_of_item_is_betweem_1_and_-1'],
                      index = ['Items 1-' + str(need_splits[0]), 
                               'Items ' + str(need_splits[0]+1) + '-' + str(need_splits[1]),
                               'Items ' + str(need_splits[1]+1) + '-' + str(need_splits[2])])

## Benchmark Data

In [4]:
num_patient = 200
need_splits = [5,10,90]
fields_need = [str(i+1) for i in range(need_splits[-1])]
fields = ['client_id', 'Program'] + fields_need
name = 'benchmark_dataset.csv'
benchmark = make_benchmark_data(fields, num_patient,
                  need_groups = need_splits)

In [5]:
benchmark

Unnamed: 0,client_id,Program,1,2,3,4,5,6,7,8,...,81,82,83,84,85,86,87,88,89,90
0,0,IBHS,3,1,0,1,3,0,-1,-1,...,0,3,3,3,0,0,3,2,3,2
1,1,IBHS,3,0,3,2,1,0,-1,0,...,1,2,2,0,1,3,3,0,0,1
2,2,IBHS,1,0,0,1,0,-1,-1,-1,...,3,0,0,1,1,-1,-1,0,2,1
3,3,IBHS,0,0,3,1,1,0,0,-1,...,3,2,0,0,0,0,1,1,1,3
4,4,IBHS,1,0,1,1,0,0,0,0,...,0,3,2,-1,0,0,2,1,-1,2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
195,195,FBMHS,0,0,0,0,-1,2,3,0,...,0,0,-1,-1,-1,1,1,0,-1,3
196,196,FBMHS,0,-1,-1,0,0,2,2,2,...,2,-1,3,-1,1,2,0,0,1,1
197,197,FBMHS,0,0,0,0,-1,1,2,3,...,0,1,0,1,3,-1,1,0,-1,0
198,198,FBMHS,0,0,-1,0,-1,1,0,2,...,0,3,2,3,0,3,1,0,-1,1


## Validation 1

In [6]:
x_item_1 = twosum_improvement_xneed_all(benchmark[benchmark['Program'] == 'IBHS'].iloc[:,2:], 
                                        benchmark[benchmark['Program'] == 'FBMHS'].iloc[:,2:], batch_num = 1, n = 1)
validate_result(x_item_1, need_splits)

Using license file /Users/carayi/gurobi.lic
Academic license - for non-commercial use only


Unnamed: 0,number_of_item_is_1,number_of_item_is_-1,number_of_item_is_betweem_1_and_-1
Items 1-5,0,5,0
Items 6-10,5,0,0
Items 11-90,32,48,0


## Validation 2

In [7]:
x_item_2 = twosum_improvement_xneed_all(benchmark[benchmark['Program'] == 'IBHS'].iloc[:,2:], 
                                        benchmark[benchmark['Program'] == 'FBMHS'].iloc[:,2:], batch_num = 5, n = 1)
validate_result(x_item_2, need_splits)

Unnamed: 0,number_of_item_is_1,number_of_item_is_-1,number_of_item_is_betweem_1_and_-1
Items 1-5,0,5,0
Items 6-10,5,0,0
Items 11-90,0,2,78
