In [3]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
This program will read existing customer database and train a model to understand customer persona
in terms of choosing vehcie type  

"""

# import libraries

# python version libraries
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# system libraries
import os

# data processing & array handling libraries 
import numpy as np
import pandas as pd

# deep learning libraries (backend - Theano)
from keras.models import Sequential, Model
from keras.layers import Input, Dense, Dropout, Activation
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils
from keras.layers.normalization import BatchNormalization

# machine learning libraries
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedShuffleSplit

print(__doc__)

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

class Data_processing:

    # initialize
    def __init__(self):
        
        # set path
        self.path = '/home/azureuser/clara-deeplearning/data/'
        self.model_path = self.path + 'models/'
        if not os.path.exists(self.model_path): os.mkdir(self.model_path)
    
    # function to read CSV file
    def read(self, file):
        
        data = pd.read_csv(file)
        return data

    # function to clean CSV file
    def clean_data(self, data):
    
        # select numeric data alone from data object
        numeric_data = data.loc[:, data.dtypes != object]
    
        # select string data alone from data object to trim
        trim_data = data.select_dtypes(['object'])
    
        # trim all string objects
        trim_data = trim_data.apply(lambda x: x.str.strip())
    
        # merge numeric and string data in to data object
        data = pd.concat([numeric_data, trim_data], axis=1)
    
        # change column name to lower case
        data.columns = [x.lower() for x in data.columns]
        return data
    
    # define model
    def model(self):
    
        # create model
        
        ############# functional model #############
        
        #inputs = Input(shape=(61,), name='main_input')
        
        #x = Dense(64, kernel_initializer='normal', activation='relu')(inputs)
        #x = Dropout(0.5)(x)
        #x = Dense(64, activation='relu')(x)
        #x = Dropout(0.5)(x)
        #predictions = Dense(71, activation='softmax')(x)
        
        #model = Model(inputs=inputs, outputs=predictions)
        
        ############# sequential model #############
        
        model = Sequential()
        
        model.add(Dense(128, input_shape=(12,), activation='tanh'))
        #model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(Dense(128, activation='tanh'))
        #model.add(BatchNormalization())
        model.add(Dropout(0.5))
        model.add(Dense(66, activation='softmax'))
        
        # compile model
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
        # optimizer
        model.optimizer.lr=1e-5
        return model
    
    # save model and weights
    def model_weights(self, model):
        
        # serialize model to JSON
        model_json = model.to_json()
        with open(self.model_path + "model_type.json", "w") as json_file:
            json_file.write(model_json)
        
        # serialize weights to HDF5
        model.save_weights(self.model_path + 'model_type' + '.hdf5')
        
        
    def print_classes(self, columns):
        vehicle_classes = {}
        index = 0
        for column in columns:
            vehicle_classes[str(index)] = column
            index += 1
        return vehicle_classes
    
# function to drive the main flow
def drive():
    
    # instantiate class Data_processing 
    process = Data_processing()

    # read input file
    data = process.read("/home/azureuser/clara-deeplearning/data/model_data_1.csv")
    
    # clean input file
    data = process.clean_data(data)
    
    # remove junk columns
    data = data[data.columns[~data.columns.str.contains('unnamed:')]]
    
    # rename column 
    data = data.rename(columns={'type of vehicle': 'type_of_vehicle'})
    
    # select features
    X = data.ix[:,['age', 'status', 'family_size', 'location', 'budget', 'type_of_vehicle']]
    
    dummy_X = pd.get_dummies(X)
    X = dummy_X.as_matrix()
    
    # select target
    dummy_y = pd.get_dummies(data['model'])
    y = dummy_y.as_matrix()
    
    # split train and test dataset
    sss = StratifiedShuffleSplit(n_splits=3, test_size=0.3, random_state=seed)
    
    for train_index, test_index in sss.split(X, y):
        train_X, test_X = X[train_index], X[test_index]
        train_y, test_y = y[train_index], y[test_index]
    
    print (test_X[1])

    # call model
    #model = process.model()
    
    # set batch size
    #batch_size, verbose, validation_split, epochs = len(train_X), 0, 0.3, 100
    #batch_size, verbose, validation_split, epochs = 1000, 2, 0.3, 2500
    
    # fit model    
    #model.fit(train_X, 
    #          train_y, 
    #          nb_epoch=epochs, 
    #          validation_split=validation_split,
    #          #callbacks=[TestCallback((test_X, test_y))],
    #          batch_size=batch_size,
    #          verbose=verbose)
    
    # validate accuracy
    #loss, accuracy = model.evaluate(test_X, test_y)
    #print("Accuracy = {:.2f}".format(accuracy))
    
    #predict = model.predict_classes(test_X)
    #print (dummy_y.columns[predict])
    #predict = model.predict_proba(test_X)
    #print (predict)
    
    # call model_weights
    #process.model_weights(model)
    
    # call print_classes
    #vehicle_classes = process.print_classes(dummy_y.columns)
    #print (vehicle_classes)
    
if __name__ == '__main__':
    drive()


This program will read existing customer database and train a model to understand customer persona
in terms of choosing vehcie type  


[3 1 0 1 0 0 0 1 0 1 0 1]
