## Import libraries

In [1]:
import pandas as pd
import numpy as np

## Set filenames

In [2]:
customers=pd.read_csv("Data/test_customers_p1.csv")
vendors=pd.read_csv("Data/vendors_p1.csv")

## For every customer details, loading 100 vendor details

In [3]:
## Reduce memory usage
def reduce_mem_usage(df, verbose=True):
    numerics = ['int16', 'int32', 'int64', 'float16', 'float32', 'float64']
    start_mem = df.memory_usage().sum() / 1024**2    
    for col in df.columns:
        col_type = df[col].dtypes
        if col_type in numerics:
            c_min = df[col].min()
            c_max = df[col].max()
            if str(col_type)[:3] == 'int':
                if c_min > np.iinfo(np.int8).min and c_max < np.iinfo(np.int8).max:
                    df[col] = df[col].astype(np.int8)
                elif c_min > np.iinfo(np.int16).min and c_max < np.iinfo(np.int16).max:
                    df[col] = df[col].astype(np.int16)
                elif c_min > np.iinfo(np.int32).min and c_max < np.iinfo(np.int32).max:
                    df[col] = df[col].astype(np.int32)
                elif c_min > np.iinfo(np.int64).min and c_max < np.iinfo(np.int64).max:
                    df[col] = df[col].astype(np.int64)  
            else:
                if c_min > np.finfo(np.float16).min and c_max < np.finfo(np.float16).max:
                    df[col] = df[col].astype(np.float16)
                elif c_min > np.finfo(np.float32).min and c_max < np.finfo(np.float32).max:
                    df[col] = df[col].astype(np.float32)
                else:
                    df[col] = df[col].astype(np.float64)    
    end_mem = df.memory_usage().sum() / 1024**2
    if verbose: print('Mem. usage decreased to {:5.2f} Mb ({:.1f}% reduction)'.format(end_mem, 100 * (start_mem - end_mem) / start_mem))
    return df

In [4]:
customers=reduce_mem_usage(customers)
vendors=reduce_mem_usage(vendors)

Mem. usage decreased to  0.43 Mb (78.9% reduction)
Mem. usage decreased to  0.01 Mb (85.0% reduction)


In [5]:
cust_ven = customers.assign(key=1).merge(vendors.assign(key=1), on='key').drop('key',axis=1)

In [6]:
cust_ven.shape

(1672000, 103)

In [7]:
cust_ven['test_CID X LOC_NUM X VENDOR'] = cust_ven['customer_id'] + " X " + cust_ven['cust_location_number'].astype(str) + " X " + cust_ven['vendor_id'].astype(str)

In [8]:
cust_ven.drop(['customer_id','cust_location_number','vendor_id','dob','cust_status','cust_verified','OpenHour'],
                    axis=1, inplace=True)

In [9]:
cust_ven['test_CID X LOC_NUM X VENDOR'].head()

0     000IPH5 X 0 X 4
1    000IPH5 X 0 X 13
2    000IPH5 X 0 X 20
3    000IPH5 X 0 X 23
4    000IPH5 X 0 X 28
Name: test_CID X LOC_NUM X VENDOR, dtype: object

In [10]:
cust_ven.columns.values

array(['Cust_created_days', 'cust_location_type_Home',
       'cust_location_type_Other', 'cust_location_type_Work',
       'gender_female', 'gender_male', 'cust_lan_long_0',
       'cust_lan_long_1', 'cust_lan_long_2', 'cust_lan_long_3',
       'cust_lan_long_4', 'delivery_charge', 'serving_distance',
       'is_open', 'prepration_time', 'discount_percentage', 'ven_status',
       'ven_verified', 'vendor_rating', 'Ven_created_days', 'american',
       'arabic', 'asian', 'bagels', 'biryani', 'breakfast', 'burgers',
       'cafe', 'cakes', 'chinese', 'churros', 'coffee', 'combos',
       'crepes', 'desserts', 'dimsum', 'donuts', 'family_meal',
       'fatayers', 'free_delivery', 'fresh_juices', 'fries',
       'frozen_yoghurt', 'grills', 'healthy_food', 'hot_chocolate',
       'hot_dogs', 'ice_creams', 'indian', 'italian', 'japanese', 'karak',
       'kebabs', 'kids_meal', 'kushari', 'lebanese', 'manakeesh',
       'mandazi', 'mexican', 'milkshakes', 'mishkak', 'mojitos',
       'mojito

In [11]:
del cust_ven['Cust_created_days']

## Sample submission

In [12]:
SS=pd.read_csv("Data/SampleSubmission.csv")

In [13]:
SS

Unnamed: 0,CID X LOC_NUM X VENDOR,target
0,Z59FTQD X 0 X 243,0
1,0JP29SK X 0 X 243,0
2,0JP29SK X 1 X 243,0
3,0JP29SK X 2 X 243,0
4,0JP29SK X 3 X 243,0
...,...,...
1048570,BB4J9W3 X 2 X 44,0
1048571,VYQ6H7K X 0 X 44,0
1048572,VYQ6H7K X 1 X 44,0
1048573,MB88K6D X 0 X 44,0


In [14]:
#convert to csv file
cust_ven.to_csv("cust_ven_test_p2.csv",index=False)