In [1]:
import pandas as pd
import numpy as np

In [54]:
def laplace_log_likelihood(actual_fvc, predicted_fvc, confidence, return_values=False):
    """
    Calculates the modified Laplace Log Likelihood score for this competition.
    """
    sd_clipped = np.maximum(confidence, 70)
    delta = np.minimum(np.abs(actual_fvc - predicted_fvc), 1000)
    metric = - np.sqrt(2) * delta / sd_clipped - np.log(np.sqrt(2) * sd_clipped)

    if return_values:
        return metric
    else:
        return np.mean(metric)

In [11]:
train = pd.read_csv('train.csv')
test = pd.read_csv('test.csv')
sample_submission = pd.read_csv('sample_submission.csv')
slope_int = pd.read_csv('patient_slope_intercept.csv')

In [12]:
slope_int.columns = ['Patients', 'slope', 'intersect']

In [13]:
slope_int

Unnamed: 0,Patients,slope,intersect
0,ID00007637202177411956430,-3.167126,2174.416183
1,ID00009637202177434476278,-9.379955,3821.785590
2,ID00010637202177584971671,-17.042803,3311.689608
3,ID00011637202177653955184,-4.548925,3487.076353
4,ID00012637202177665765362,-8.543079,3774.033242
...,...,...,...
171,ID00419637202311204720264,-2.135975,2828.850124
172,ID00421637202311550012437,0.597787,2764.010579
173,ID00422637202311677017371,-4.740104,1984.824680
174,ID00423637202312137826377,-9.214886,3049.995721


In [14]:
sample_submission

Unnamed: 0,Patient_Week,FVC,Confidence
0,ID00419637202311204720264_-12,2000,100
1,ID00421637202311550012437_-12,2000,100
2,ID00422637202311677017371_-12,2000,100
3,ID00423637202312137826377_-12,2000,100
4,ID00426637202313170790466_-12,2000,100
...,...,...,...
725,ID00419637202311204720264_133,2000,100
726,ID00421637202311550012437_133,2000,100
727,ID00422637202311677017371_133,2000,100
728,ID00423637202312137826377_133,2000,100


In [15]:
test

Unnamed: 0,Patient,Weeks,FVC,Percent,Age,Sex,SmokingStatus
0,ID00419637202311204720264,6,3020,70.186855,73,Male,Ex-smoker
1,ID00421637202311550012437,15,2739,82.045291,68,Male,Ex-smoker
2,ID00422637202311677017371,6,1930,76.672493,73,Male,Ex-smoker
3,ID00423637202312137826377,17,3294,79.258903,72,Male,Ex-smoker
4,ID00426637202313170790466,0,2925,71.824968,73,Male,Never smoked


In [18]:
patients = test.Patient.to_list()

In [60]:
def FVC_line_pred(slope, initial_week, initial_value):
    b = initial_value - slope*initial_week
    def FVC_line(week):
        return slope*week + b
    return FVC_line

In [48]:
submission = pd.DataFrame(columns = ['Patient_Week', 'FVC', 'Confidence'])

for pat in patients:
    initial_week = int(test[test['Patient']==pat].Weeks)
    func = FVC_line_pred(int(slope_int[slope_int['Patients'] == pat].slope), initial_week,
                         int(test[test['Patient']==pat].FVC))
    for i in range(-12,134):
        pat_week = pat + '_' + str(i)
        FVC_i = func(i)
        confidence_i = abs(initial_week - i)*3.7
        submission = submission.append({'Patient_Week': pat_week, 'FVC': FVC_i, 'Confidence': confidence_i}, ignore_index=True)
        

In [49]:
submission

Unnamed: 0,Patient_Week,FVC,Confidence
0,ID00419637202311204720264_-12,3056,66.6
1,ID00419637202311204720264_-11,3054,62.9
2,ID00419637202311204720264_-10,3052,59.2
3,ID00419637202311204720264_-9,3050,55.5
4,ID00419637202311204720264_-8,3048,51.8
...,...,...,...
725,ID00426637202313170790466_129,2796,477.3
726,ID00426637202313170790466_130,2795,481.0
727,ID00426637202313170790466_131,2794,484.7
728,ID00426637202313170790466_132,2793,488.4


In [50]:
test

Unnamed: 0,Patient,Weeks,FVC,Percent,Age,Sex,SmokingStatus
0,ID00419637202311204720264,6,3020,70.186855,73,Male,Ex-smoker
1,ID00421637202311550012437,15,2739,82.045291,68,Male,Ex-smoker
2,ID00422637202311677017371,6,1930,76.672493,73,Male,Ex-smoker
3,ID00423637202312137826377,17,3294,79.258903,72,Male,Ex-smoker
4,ID00426637202313170790466,0,2925,71.824968,73,Male,Never smoked


In [52]:
submission[submission['Patient_Week'] == 'ID00419637202311204720264_133']

Unnamed: 0,Patient_Week,FVC,Confidence
145,ID00419637202311204720264_133,2766,469.9


In [59]:
train[train.Patient == test.Patient.unique()[4]]

Unnamed: 0,Patient,Weeks,FVC,Percent,Age,Sex,SmokingStatus
1540,ID00426637202313170790466,0,2925,71.824968,73,Male,Never smoked
1541,ID00426637202313170790466,7,2903,71.284746,73,Male,Never smoked
1542,ID00426637202313170790466,9,2916,71.603968,73,Male,Never smoked
1543,ID00426637202313170790466,11,2976,73.077301,73,Male,Never smoked
1544,ID00426637202313170790466,13,2712,66.594637,73,Male,Never smoked
1545,ID00426637202313170790466,19,2978,73.126412,73,Male,Never smoked
1546,ID00426637202313170790466,31,2908,71.407524,73,Male,Never smoked
1547,ID00426637202313170790466,43,2975,73.052745,73,Male,Never smoked
1548,ID00426637202313170790466,59,2774,68.117081,73,Male,Never smoked


In [None]:
test.Patient.unique