In [9]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Recommendation

"""

# import libraries

# python version libraries
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# system libraries
import os

# data processing & array handling libraries 
import numpy as np
import pandas as pd

# deep learning libraries (backend - Theano)
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Activation
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from keras.layers.normalization import BatchNormalization

# machine learning libraries
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedShuffleSplit

print(__doc__)

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

class Data_processing:

    # initialize
    def __init__(self):
        
        # set path
        self.path = '/home/azureuser/clara-deeplearning/data/'
        self.model_path = self.path + 'models/'
        if not os.path.exists(self.model_path): os.mkdir(self.model_path)
    
    # function to read CSV file
    def read(self, file):
        
        data = pd.read_csv(file, encoding = "ISO-8859-1")
        return data

    # function to clean CSV file
    def clean_data(self, data):
    
        # select numeric data alone from data object
        numeric_data = data.loc[:, data.dtypes != object]
    
        # select string data alone from data object to trim
        trim_data = data.select_dtypes(['object'])
    
        # trim all string objects
        trim_data = trim_data.apply(lambda x: x.str.strip())
    
        # merge numeric and string data in to data object
        data = pd.concat([numeric_data, trim_data], axis=1)
    
        # change column name to lower case
        data.columns = [x.lower() for x in data.columns]
        return data
    
    # define model
    def model(self):
    
        # create model
        
        ############# functional model #############
        
        #inputs = Input(shape=(61,), name='main_input')
        
        #x = Dense(64, kernel_initializer='normal', activation='relu')(inputs)
        #x = Dropout(0.5)(x)
        #x = Dense(64, activation='relu')(x)
        #x = Dropout(0.5)(x)
        #predictions = Dense(71, activation='softmax')(x)
        
        #model = Model(inputs=inputs, outputs=predictions)
        
        ############# sequential model #############
        
        model = Sequential()
        
        model.add(Dense(128, input_shape=(12,), kernel_initializer='normal', activation='tanh'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(Dense(128, kernel_initializer='normal', activation='tanh'))
        model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(Dense(66, kernel_initializer='normal', activation='softmax'))
        
        # compile model
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
        # optimizer
        model.optimizer.lr=1e-5
        return model
    
    # save model and weights
    def model_weights(self, model):
        
        # serialize model to JSON
        model_json = model.to_json()
        with open(self.model_path + "model_type.json", "w") as json_file:
            json_file.write(model_json)
        
        # serialize weights to HDF5
        model.save_weights(self.model_path + 'model_type' + '.hdf5')
        
    def print_classes(self, columns):
        vehicle_classes = {}
        index = 0
        for column in columns:
            vehicle_classes[str(index)] = column
            index += 1
        return vehicle_classes

# function to drive the main flow
def drive():
    
    # instantiate class Data_processing 
    process = Data_processing()

    # read input file
    data = process.read("/home/azureuser/clara-deeplearning/data/accessories.csv")
    
    # clean input file
    data = process.clean_data(data)
    
    data = data.groupby(['model', 'name']).agg(['count'])
    print (data)

if __name__ == '__main__':
    drive()


Recommendation


                                                                         prce  \
                                                                        count   
model                    name                                                   
2016 CLS400 4MATIC Coupe 18-inch 10-spoke alloy wheels                      1   
                         19-inch 10-spoke alloy wheels                      1   
                         AIRMATIC semi-active air suspension                1   
                         Driver Assistance Package                          1   
                         Heated steering wheel                              1   
                         Lane Tracking Package                              1   
                         License plate frame                                1   
                         Parking Assist Package                             1   
                         Tire valve stem caps                               1   
          