In [1]:
import pandas as pd
from datetime import datetime
import numpy as np
import os
import gc
pd.set_option('display.max_columns', 100)
import joblib
def generate_submission_file(final):
    '''
        Generates submission file from predicted results
    '''
    d = dict()
    for row in final.itertuples():
        if row.prediction== 1:
            try:
                d[row.order_id] += ' ' + str(row.product_id)
            except:
                d[row.order_id] = str(row.product_id)
    # If no product is predicted for a particular order_id then we will predict None
    for order in final.order_id:
        if order not in d:
            d[order] = 'None'
    gc.collect()
    #Convert the dictionary into a DataFrame
    sub = pd.DataFrame.from_dict(d, orient='index')

    #Reset index
    sub.reset_index(inplace=True)
    #Set column names
    sub.columns = ['order_id', 'products']
    #sub.head()
    sub.to_csv('submission.csv', index=False, header=True)
    
def get_predictions(data_test,df_orders):
    '''
         Gives predictions and generates submission.csv file
        Args: 
            data_test :  test DF
            df_orders : user's orders data
        Returns:
                None
    '''
    # load model
    lgbm = joblib.load('lgbm.pkl')
    #making prdeictions on the test dataset
    y_pred_test = (lgbm.predict_proba(data_test)[:, 1] >= 0.22).astype('int') #setting a threshold.
    #saving the prediction as a new column in data_test
    data_test['prediction'] = y_pred_test
    # Reset the index
    final = data_test.reset_index()
    # Keep only the required columns to create our submission file 
    final = final[['product_id', 'user_id', 'prediction']]
    gc.collect()
    orders_test = df_orders.loc[df_orders.eval_set == 'test', ['user_id', 'order_id']]
    #merging our prediction with orders_test
    final = final.merge(orders_test, on='user_id', how='left')
    #remove user_id column
    final = final.drop('user_id', axis=1)
    #convert product_id as integer
    final['product_id'] = final.product_id.astype(int)
    ## Remove all unnecessary objects
    del orders_test
    gc.collect()
    generate_submission_file(final)
    print("File generation suncessful")