In [1]:
import pandas as pd 
# import polars as pl 
import numpy as np
import xgboost
import os 
from tqdm import tqdm
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.svm import SVR
from functools import reduce

In [2]:
xgboost_params={'n_estimators': 10,
  'max_depth': 4,
  'learning_rate': 0.3,
  'colsample_bytree': 1,
  'subsample': 1,
  'tree_method': 'hist',
  'n_jobs': 12,
  'gamma': 0,
  'reg_lambda': 0}
svr_params={
    "kernel":"linear"
}
model_use="xgboost"
models_dict={"xgboost":{"model":xgboost.XGBRegressor,"params":xgboost_params},"linear":{"model":LinearRegression,"params":{ "fit_intercept":True}},"svr":{"model":SVR,"params":svr_params}}

In [3]:
import pkg_resources
import types
def get_imports():
    for name, val in globals().items():
        if isinstance(val, types.ModuleType):
            # Split ensures you get root package, 
            # not just imported function
            name = val.__name__.split(".")[0]

        elif isinstance(val, type):
            name = val.__module__.split(".")[0]

        # Some packages are weird and have different
        # imported names vs. system names
        if name == "PIL":
            name = "Pillow"
        elif name == "sklearn":
            name = "scikit-learn"

        yield name
imports = list(set(get_imports()))

requirements = []
for m in pkg_resources.working_set:
    if m.project_name in imports and m.project_name!="pip":
        requirements.append((m.project_name, m.version))

for r in requirements:
    print("{}=={}".format(*r))

tqdm==4.64.1
numpy==1.21.6
xgboost==1.6.2
pandas==1.3.5
scikit-learn==1.0.2


In [4]:
INPUT_DIR="/kaggle/input/amp-parkinsons-disease-progression-prediction"

In [5]:
#FEATURE ENGENERING
# Get peptide catidates from higher variability of abundance
def get_peptide_cadidates(peptides_df, num_candidates=10):
    # Calculate the coefficient of variation (CV) for PeptideAbundance per patient_ids and Peptides
    train_peptides_df_agg = peptides_df[['patient_id', 'Peptide', 'PeptideAbundance']]
    train_peptides_df_agg = train_peptides_df_agg\
                            .groupby(['patient_id', 'Peptide'])['PeptideAbundance']\
                            .aggregate(['mean', 'std'])
    train_peptides_df_agg['CV_PeptideAbundance[%]'] = \
        train_peptides_df_agg['std'] / train_peptides_df_agg['mean'] * 100

    # Mean CV value of Peptides
    abundance_cv_mean = train_peptides_df_agg\
                        .groupby('Peptide')['CV_PeptideAbundance[%]']\
                        .mean().reset_index()
    abundance_cv_mean = abundance_cv_mean.sort_values(
        by='CV_PeptideAbundance[%]', ascending=False).reset_index(drop=True)

    # Get peptide candidates
    peptide_candidates = abundance_cv_mean.loc[:num_candidates-1, 'Peptide']
    return peptide_candidates


# Get protein candidate from proteins with higher variability of NPX
def get_protein_cadidates(proteins_df, num_candidates=10):
    # Calculate the coefficient of variation (CV) for NPX per patient_ids and UniProt
    train_proteins_df_agg = proteins_df[['patient_id', 'UniProt', 'NPX']]
    train_proteins_df_agg = train_proteins_df_agg.groupby(['patient_id', 'UniProt'])['NPX']\
                            .aggregate(['mean', 'std'])
    train_proteins_df_agg['CV_NPX[%]'] = \
        train_proteins_df_agg['std'] / train_proteins_df_agg['mean'] * 100

    # Mean CV value for UniProt
    NPX_cv_mean = train_proteins_df_agg.groupby('UniProt')['CV_NPX[%]'].mean().reset_index()
    NPX_cv_mean = NPX_cv_mean.sort_values(by='CV_NPX[%]', ascending=False).reset_index(drop=True)

    # Get peptide candidates
    protein_candidates = NPX_cv_mean.loc[:num_candidates-1, 'UniProt']
    return protein_candidates
    


def preprocessing_data(clinical_df, peptides_df, proteins_df,
                       peptide_candidates, protein_candidates,train=True):
    peptides_df_use=peptides_df.loc[peptides_df.Peptide.isin(peptide_candidates )].reset_index(drop=True)
    peptides_df_use=peptides_df_use.pivot_table(index=["visit_id"],columns=["Peptide"],values="PeptideAbundance",fill_value=np.nan)

    proteins_df_use=proteins_df.loc[proteins_df.UniProt.isin(protein_candidates )].reset_index(drop=True)
    proteins_df_use=proteins_df_use.pivot_table(index=["visit_id"],columns=["UniProt"],values="NPX",fill_value=np.nan)

    df=pd.merge(clinical_df,proteins_df_use,on="visit_id",how="left")
    df=pd.merge(df,peptides_df_use,on="visit_id",how="left")
  
    FEATURES_ewn=list(peptide_candidates)+list(protein_candidates)
    FEATURES=FEATURES_ewn+["visit_month"]
    for feature in FEATURES_ewn:
        if feature not in df.columns:
            print(f" feature {feature} not in columns, adding it as NA")
            df[feature]=np.nan
    if train:
        df=df.sort_values(by=["patient_id","visit_month"])
        #df[FEATURES_ewn]=df.groupby("patient_id",sort=False).fillna(method="ffill")[FEATURES_ewn]
    else:
        df=df.sort_values(by=["patient_id","updrs_test","visit_month"])
        
        #df[FEATURES_ewn]=df.groupby(["patient_id","updrs_test"],sort=False).fillna(method="ffill")[FEATURES_ewn]
        
    
    return df,FEATURES_ewn,FEATURES


In [6]:
train_clinical_df = pd.read_csv(os.path.join(INPUT_DIR, 'train_clinical_data.csv'))
train_peptides = pd.read_csv(os.path.join(INPUT_DIR, 'train_peptides.csv'))
train_proteins = pd.read_csv(os.path.join(INPUT_DIR, 'train_proteins.csv'))
NUM_CANDIDATES =5
peptide_candidates=get_peptide_cadidates(train_peptides,num_candidates=NUM_CANDIDATES )
protein_candidates=get_protein_cadidates(train_proteins,num_candidates=NUM_CANDIDATES )
train_df,FEATURES_ewn,FEATURES=preprocessing_data(train_clinical_df, train_peptides, train_proteins,
                       peptide_candidates, protein_candidates)
print('train_df:')
display(train_df.head(10).style.set_properties(
    **{"background-color": "#212636","color":"white","border": "1.5px solid white"}))

train_df:


Unnamed: 0,visit_id,patient_id,visit_month,updrs_1,updrs_2,updrs_3,updrs_4,upd23b_clinical_state_on_medication,P01861,P16152,P30086,P98160,Q8IWV7,ADDKETC(UniMod_4)FAEEGK,LFDSDPITVTVPVEVSR,LPPTSAHGNVAEGETKPDPDVTER,SC(UniMod_4)SPELQQK,TTPPVLDSDGSFFLYSK
0,55_0,55,0,10.0,6.0,15.0,,,18430.1,47171.0,89747.3,21970.1,57246.2,70412.0,130107.0,129775.0,47171.0,80617.8
1,55_3,55,3,10.0,7.0,25.0,,,,,,,,,,,,
2,55_6,55,6,8.0,10.0,34.0,,,31112.4,44669.0,72686.0,20700.7,158574.0,63052.4,288345.0,874897.0,44669.0,78095.3
3,55_9,55,9,8.0,9.0,30.0,0.0,On,,,,,,,,,,
4,55_12,55,12,10.0,10.0,41.0,0.0,On,22094.8,44159.5,86064.9,19547.8,152944.0,72325.0,282297.0,828847.0,44159.5,81248.8
5,55_18,55,18,7.0,13.0,38.0,0.0,On,,,,,,,,,,
6,55_24,55,24,16.0,9.0,49.0,0.0,On,,,,,,,,,,
7,55_30,55,30,14.0,13.0,49.0,0.0,On,,,,,,,,,,
8,55_36,55,36,17.0,18.0,51.0,0.0,On,21304.2,48076.2,90811.9,21385.7,177998.0,537430.0,71400.1,960251.0,48076.2,6875.79
9,55_42,55,42,12.0,20.0,41.0,0.0,On,,,,,,,,,,


In [7]:
all_features=list(set(train_df.columns)-set(["visit_id","patient_id","updrs_1","updrs_2","updrs_3","updrs_4","upd23b_clinical_state_on_medication"]))

In [8]:
FEATURES

['SC(UniMod_4)SPELQQK',
 'LPPTSAHGNVAEGETKPDPDVTER',
 'ADDKETC(UniMod_4)FAEEGK',
 'TTPPVLDSDGSFFLYSK',
 'LFDSDPITVTVPVEVSR',
 'P16152',
 'P98160',
 'Q8IWV7',
 'P30086',
 'P01861',
 'visit_month']

In [9]:
train_df.shape

(2615, 18)

In [10]:
def create_X_y_train_dataset(df, updrs_part, plus_month):
    # df: train_df, created above
    # updrs_part: 1 to 4
    # plus_month: 0, 6, 12, 24
    df_ = df.dropna(subset=[f'updrs_{updrs_part}'])
    X_visit_ids = []
    y_visit_ids = []
    patient_ids = df['patient_id'].unique()
    for i, patient_id in enumerate(patient_ids):
        patient_df = df_[df_['patient_id']==patient_id]
        plus_months = patient_df['visit_month'] + plus_month
        plus_months = patient_df.query('visit_month in @plus_months')['visit_month']
        original_months = plus_months - plus_month
        patient_id = str(patient_id)
        X_visit_id = [patient_id+'_'+str(original_month) for original_month in original_months]
        y_visit_id = [patient_id+'_'+str(plus_month) for plus_month in plus_months]
        X_visit_ids.extend(X_visit_id)
        y_visit_ids.extend(y_visit_id)
    
    X = df_.query('visit_id in @X_visit_ids')
    X = X.drop(['updrs_1', 'updrs_2', 'updrs_3', 'updrs_4'], axis=1)
    X.reset_index(drop=True, inplace=True)
    
    y = df_.query('visit_id in @y_visit_ids')
    y = y[['patient_id','visit_id', f'updrs_{updrs_part}']]
    y.reset_index(drop=True, inplace=True)
    
    return X, y

def create_X_y_dict(df):
    X_dict = {}
    y_dict = {}
    for updrs_part in tqdm([1, 2, 3, 4]):
        for plus_month in [0, 6, 12, 24]:
            X, y = create_X_y_train_dataset(df, updrs_part, plus_month)
            key = f'updrs_{updrs_part}_plus_{plus_month}_months'
            X_dict[key] = X
            y_dict[key] = y
    return X_dict, y_dict

In [11]:
X_dict, y_dict = create_X_y_dict(train_df)

100%|██████████| 4/4 [00:14<00:00,  3.61s/it]


In [12]:
X_dict['updrs_1_plus_0_months']

Unnamed: 0,visit_id,patient_id,visit_month,upd23b_clinical_state_on_medication,P01861,P16152,P30086,P98160,Q8IWV7,ADDKETC(UniMod_4)FAEEGK,LFDSDPITVTVPVEVSR,LPPTSAHGNVAEGETKPDPDVTER,SC(UniMod_4)SPELQQK,TTPPVLDSDGSFFLYSK
0,55_0,55,0,,18430.1,47171.0,89747.3,21970.1,57246.2,70412.0,130107.0,129775.0,47171.0,80617.8
1,55_3,55,3,,,,,,,,,,,
2,55_6,55,6,,31112.4,44669.0,72686.0,20700.7,158574.0,63052.4,288345.0,874897.0,44669.0,78095.3
3,55_9,55,9,On,,,,,,,,,,
4,55_12,55,12,On,22094.8,44159.5,86064.9,19547.8,152944.0,72325.0,282297.0,828847.0,44159.5,81248.8
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2609,65043_48,65043,48,Off,,24151.3,83370.2,14624.0,93123.7,446770.0,243788.0,708453.0,24151.3,85003.7
2610,65043_54,65043,54,Off,,,,,,,,,,
2611,65043_60,65043,60,Off,,,,,,,,,,
2612,65043_72,65043,72,Off,,,,,,,,,,


In [13]:
y_dict['updrs_1_plus_0_months']

Unnamed: 0,patient_id,visit_id,updrs_1
0,55,55_0,10.0
1,55,55_3,10.0
2,55,55_6,8.0
3,55,55_9,8.0
4,55,55_12,10.0
...,...,...,...
2609,65043,65043_48,7.0
2610,65043,65043_54,4.0
2611,65043,65043_60,6.0
2612,65043,65043_72,3.0


In [14]:
def smape1(y_true, y_pred):
    y_true = y_true + 1
    y_pred = y_pred + 1
    numerator = np.abs(y_true - y_pred)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    positive_index = (y_true!=0) | (y_pred!=0)
    smape = np.zeros(len(y_true))
    smape[positive_index] = numerator[positive_index] / denominator[positive_index]
    smape = 100 * np.mean(smape)
    return smape

In [15]:
patients_array=[]
for key,X in X_dict.items():
    X=X_dict[key]
    print(key,X_dict[key].shape,len(X_dict[key].patient_id.unique()),X_dict[key].visit_month.unique())
    patients=X.patient_id.unique()
    patients_array.append(patients)
intersection=reduce(lambda a,b:set(a).intersection(set(b)),patients_array)
union=reduce(lambda a,b:set(a).union(set(b)),patients_array) 

intersection=list(intersection)
intersection.sort()
len(intersection),len(union)

updrs_1_plus_0_months (2614, 14) 248 [  0   3   6   9  12  18  24  30  36  42  48  54  60  72  84  96 108]
updrs_1_plus_6_months (1681, 14) 197 [ 0  3  6 12 18 24 30 36 42 48 54]
updrs_1_plus_12_months (1881, 14) 248 [ 0  6 12 18 24 30 36 42 48 60 72 84 96]
updrs_1_plus_24_months (1473, 14) 246 [ 0  6 12 18 24 30 36 48 60 72 84]
updrs_2_plus_0_months (2613, 14) 248 [  0   3   6   9  12  18  24  30  36  42  48  54  60  72  84  96 108]
updrs_2_plus_6_months (1680, 14) 197 [ 0  3  6 12 18 24 30 36 42 48 54]
updrs_2_plus_12_months (1881, 14) 248 [ 0  6 12 18 24 30 36 42 48 60 72 84 96]
updrs_2_plus_24_months (1473, 14) 246 [ 0  6 12 18 24 30 36 48 60 72 84]
updrs_3_plus_0_months (2590, 14) 248 [  0   3   6   9  12  18  24  30  36  42  48  54  60  72  84  96 108]
updrs_3_plus_6_months (1655, 14) 197 [ 0  3  6 12 18 24 30 36 42 48 54]
updrs_3_plus_12_months (1848, 14) 248 [ 0  6 12 18 24 30 36 42 48 60 72 84 96]
updrs_3_plus_24_months (1443, 14) 246 [ 0  6 12 18 24 30 36 48 60 72 84]
updrs_4

(167, 248)

In [16]:
patients_train,patients_test = train_test_split( X_dict["updrs_1_plus_0_months"].patient_id.unique(),test_size=0.15, random_state=42)
patients_train.shape,patients_test.shape 
patients_train_good=list(set(patients_train).intersection(set(intersection)))
patients_train_bad=list(set(patients_train)-(set(intersection)))

In [17]:
patients_good=list(set(X_dict["updrs_1_plus_0_months"].patient_id.unique()).intersection(intersection))

In [18]:
len(patients_train),len(patients_train_good),len(patients_test ),len(patients_good),len(X_dict["updrs_1_plus_0_months"].patient_id.unique())

(210, 146, 38, 167, 248)

In [19]:
groups_test=train_df.loc[train_df.patient_id.isin(patients_test)].groupby(["patient_id","visit_month"]).size()
index=pd.MultiIndex.from_product([groups_test.index.get_level_values(0),groups_test.index.get_level_values(1)],names=groups_test.index.names)
groups_test=groups_test.reindex(index,fill_value=0).reset_index().rename(columns={0:"count"})
patients_test_bad=groups_test.query("visit_month==6 and count==0").patient_id.unique()
patients_test_good=list(set(patients_test)-set(patients_test_bad))

In [20]:
len(patients_test_bad),len(patients_test_good)

(9, 29)

In [21]:
FEATURES_MONTH=["visit_month"]
models={"bad":{},"good":{}}
model_use="svr"
params={
    "kernel":"rbf",
    "degree":2,
    "C":2,
    "epsilon":2
}
FEATURES=FEATURES_MONTH
df_pred=None
for key,X in X_dict.items():
    target=key.rsplit("_",3)[0]
    y=y_dict [key]
    y["visit_month"]=y["visit_id"].transform(lambda x:x.split("_")[1]).astype(int)
    
    
    model=models_dict[model_use]["model"](**params)
    X_train_good=X.loc[X.patient_id.isin(patients_train_good)]
    y_train_good=y.loc[y.patient_id.isin(patients_train_good)]
    model.fit(X_train_good[FEATURES],y_train_good[ target])
    
    X_test_good=X.loc[(X.patient_id.isin(patients_test_good)) |(X.visit_month<=6)]
    y_test_good=y.loc[(y.patient_id.isin(patients_test_good)) |(X.visit_month<=6)]
    
    y_pred_good=model.predict(X_test_good[FEATURES])
    y_pred_good[y_pred_good<0]=0
    y_pred_good=np.round(y_pred_good)
    df_pred=pd.concat([df_pred,pd.DataFrame({"real":y_test_good[ target], "pred":y_pred_good,"key":key.rsplit("_",3)[0]})])
    score=smape1(y_test_good[ target], y_pred_good)
    print(f"Key {key}, score good  {score}")
    print(X_test_good.shape, y_test_good.shape)
    
    
    
    model_bad=models_dict[model_use]["model"](**models_dict[model_use]["params"])    
    X_train_bad=X.loc[X.patient_id.isin(patients_train_bad)]
    y_train_bad=y.loc[y.patient_id.isin(patients_train_bad)]
    model_bad.fit(X_train_bad[FEATURES],y_train_bad[target])
    
    X_test_bad=X.loc[(X.patient_id.isin(patients_test_bad)) & (X.visit_month>6)]
    y_test_bad=y.loc[(y.patient_id.isin(patients_test_bad)) &(X.visit_month>6)]
    if len(X_test_bad)>0:
        if key.rsplit("_",3)[0]=="updrs_1":
            y_pred_bad=3
        elif key.rsplit("_",3)[0]=="updrs_2":
            y_pred_bad=1
        elif key.rsplit("_",3)[0]=="updrs_3":
            y_pred_bad=1
        df_pred=pd.concat([df_pred,pd.DataFrame({"real":y_test_bad[ target], "pred":y_pred_bad,"key":key.rsplit("_",3)[0]})])
        score=smape1(y_test_bad[ target], y_pred_bad)
        print(f"Key {key}, score  bad {score}")
        print(X_test_good.shape, y_test_good.shape)
    
   
    X_good=X.loc[(X.patient_id.isin(patients_good))]
    y_good=y.loc[(y.patient_id.isin(patients_good))]
    #X=X.loc[X.patient_id.isin(intersection)]
    #y=y.loc[y.patient_id.isin(intersection)]
    model.fit(X[FEATURES],y[ target])
    models["good"][key]=model
    models["bad"][key]=  model_bad
    

Key updrs_1_plus_0_months, score good  51.78574370883297
(833, 14) (833, 4)
Key updrs_1_plus_0_months, score  bad 56.88963131586083
(833, 14) (833, 4)
Key updrs_1_plus_6_months, score good  50.946652638874056
(669, 14) (669, 4)
Key updrs_1_plus_6_months, score  bad 68.13852813852813
(669, 14) (669, 4)
Key updrs_1_plus_12_months, score good  49.95667070807212
(628, 14) (628, 4)
Key updrs_1_plus_12_months, score  bad 52.939441510870076
(628, 14) (628, 4)
Key updrs_1_plus_24_months, score good  49.83954966107597
(560, 14) (560, 4)
Key updrs_1_plus_24_months, score  bad 50.008150579579144
(560, 14) (560, 4)
Key updrs_2_plus_0_months, score good  64.97296643152275
(832, 14) (832, 4)
Key updrs_2_plus_0_months, score  bad 61.087218792136824
(832, 14) (832, 4)
Key updrs_2_plus_6_months, score good  61.70049599740553
(668, 14) (668, 4)
Key updrs_2_plus_6_months, score  bad 33.33333333333333
(668, 14) (668, 4)
Key updrs_2_plus_12_months, score good  68.09938342860106
(628, 14) (628, 4)
Key updrs

In [22]:
df_pred.loc[df_pred.key=="updrs_4","pred"]=0
smape1(df_pred["real"], df_pred["pred"])

58.40665518807901

In [23]:
len(df_pred)

9617

In [24]:
import amp_pd_peptide
env = amp_pd_peptide.make_env()
iter_test = env.iter_test()



In [25]:
i = 0
samples=[]
test_clinical_df_acumulated=None
train_peptides_df_acumulated=None
train_proteins_df_acumulated=None
for (test_clinical_df, train_peptides_df, train_proteins_df, sample_submission_df) in iter_test:
    print(f"Iteration {i}")
    test_clinical_df_acumulated=pd.concat([test_clinical_df_acumulated,test_clinical_df])
    train_peptides_df_acumulated=pd.concat([train_peptides_df_acumulated,train_peptides_df])
    train_proteins_df_acumulated=pd.concat([ train_proteins_df_acumulated,train_proteins_df])
    sample_submission_df_old=  sample_submission_df.copy()
    sample_submission_df["updrs_test"]=sample_submission_df["prediction_id"].transform(lambda x: x.split("_plus")[0].split("_",2)[2])
    sample_submission_df["visit_id"]=sample_submission_df["prediction_id"].transform(lambda x: x.rsplit("_",5)[0])
    sample_submission_df["model_key"]=sample_submission_df["prediction_id"].transform(lambda x: x.split("_",2)[2])
    sample_submission_df=sample_submission_df.drop("rating",axis=1)
    test_df,_,_ = preprocessing_data(
        test_clinical_df_acumulated,
        train_peptides_df_acumulated,
        train_proteins_df_acumulated,
        peptide_candidates, # defined in training
        protein_candidates,  # defined in training
        train=False
    )
    if test_clinical_df.visit_month.unique().min()>6:
        groups_test=test_df.groupby(["patient_id","visit_month"]).size()
        index=pd.MultiIndex.from_product([groups_test.index.get_level_values(0),groups_test.index.get_level_values(1)],names=groups_test.index.names)
        groups_test=groups_test.reindex(index,fill_value=0).reset_index().rename(columns={0:"count"})
        patients_test_bad=groups_test.query("count==0 and visit_month==6").patient_id.unique()
        patients_test_good=list(set(test_df.patient_id.unique())-set(patients_test_bad))
    else:
        patients_test_good=test_df.patient_id.unique()
        patients_test_bad=[]
        
    df_predict=None
    for key,df in sample_submission_df.groupby("model_key"):
        df=pd.merge(df,test_df,on=["updrs_test","visit_id"])
        if key.rsplit("_",3)[0]!='updrs_4':
            
            df_good=df.loc[df.patient_id.isin(patients_test_good)]
            df_bad=df.loc[df.patient_id.isin(patients_test_bad)]
            model_good=models["good"][key]
            model_bad=models["bad"][key]
            if len(df_good)>0:
                df_good["rating"]=model_good.predict(df_good[FEATURES])
                mask=df_good["rating"]<0
                df_good.loc[mask,"rating"]=0
                df_good["rating"]=np.round( df_good["rating"])
            if len(df_bad)>0:
                if key.rsplit("_",3)[0]=="updrs_1":
                    df_bad["rating"]=3
                elif key.rsplit("_",3)[0]=="updrs_2":
                    df_bad["rating"]=1
                elif key.rsplit("_",3)[0]=="updrs_3":
                    df_bad["rating"]=1
            df=pd.concat([df_good,df_bad])
        else:
            print(f"Just 0 for {key}")
            df["rating"]=0
        df_predict=pd.concat([df_predict,df[["prediction_id","rating"]]])
    
    sample_submission_df=pd.merge(sample_submission_df,df_predict,on="prediction_id",how="left").drop(["updrs_test","visit_id","model_key"],axis=1)
    sample_submission_df=sample_submission_df.fillna(0)
    env.predict(sample_submission_df)
    
    
    
    i += 1

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.
Iteration 0
Just 0 for updrs_4_plus_0_months
Just 0 for updrs_4_plus_12_months
Just 0 for updrs_4_plus_24_months
Just 0 for updrs_4_plus_6_months
Iteration 1
Just 0 for updrs_4_plus_0_months
Just 0 for updrs_4_plus_12_months
Just 0 for updrs_4_plus_24_months
Just 0 for updrs_4_plus_6_months


In [26]:
sample_submission_df

Unnamed: 0,prediction_id,rating
0,3342_6_updrs_1_plus_0_months,5.0
1,3342_6_updrs_1_plus_6_months,6.0
2,3342_6_updrs_1_plus_12_months,6.0
3,3342_6_updrs_1_plus_24_months,7.0
4,3342_6_updrs_2_plus_0_months,5.0
5,3342_6_updrs_2_plus_6_months,6.0
6,3342_6_updrs_2_plus_12_months,5.0
7,3342_6_updrs_2_plus_24_months,6.0
8,3342_6_updrs_3_plus_0_months,18.0
9,3342_6_updrs_3_plus_6_months,19.0
