In [14]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Program to predict Mercedes Benz vehicle model using various features from Clara 

"""

# import libraries

# python version libraries
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# system libraries
import os

# data processing & array handling libraries 
import numpy as np
import pandas as pd

# deep learning libraries (backend - Theano)
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

# machine learning libraries
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.model_selection import StratifiedShuffleSplit

print(__doc__)

# fix random seed for reproducibility
seed = 7
np.random.seed(seed)

# set path
path = '/home/azureuser/clara-deeplearning/data/'
model_path = path + 'models/'
if not os.path.exists(model_path): os.mkdir(model_path)

class Data_processing:

    # initialize
    def __init__(self):
        pass
    
    # function to read CSV file
    def read(self, file):
        
        data = pd.read_csv(file)
        return data

    # function to clean CSV file
    def clean_data(self, data):
    
        # select numeric data alone from data object
        numeric_data = data.loc[:, data.dtypes != object]
    
        # select string data alone from data object to trim
        trim_data = data.select_dtypes(['object'])
    
        # trim all string objects
        trim_data = trim_data.apply(lambda x: x.str.strip())
    
        # merge numeric and string data in to data object
        data = pd.concat([numeric_data, trim_data], axis=1)
    
        # change column name to lower case
        data.columns = [x.lower() for x in data.columns]
        return data
    
    # define baseline model
    def model(self):
    
        # create model
        model = Sequential()
        
        model.add(Dense(142, input_shape=(61,), kernel_initializer='normal', activation='tanh'))
        model.add(Dropout(0.5))
        model.add(Dense(142, kernel_initializer='normal', activation='tanh'))
        model.add(Dropout(0.5))
        model.add(Dense(71, kernel_initializer='normal', activation='softmax'))
        
        # compile model
        model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
        
        model.optimizer.lr=1e-5
        
        # serialize model to JSON
        model_json = model.to_json()
        with open(model_path + "model_type.json", "w") as json_file:
            json_file.write(model_json)
        
        # serialize weights to HDF5
        model.save_weights(model_path + 'model_type' + '.hdf5')
        return model

# function to drive the main flow
def drive():
    
    # instantiate class Data_processing 
    process = Data_processing()

    # read input file
    data = process.read("/home/azureuser/clara-deeplearning/data/model_data.csv")
    
    # clean input file
    data = process.clean_data(data)
    
    # remove junk columns
    data = data[data.columns[~data.columns.str.contains('unnamed:')]]
    
    # rename column 
    data = data.rename(columns={'age category': 'age_category'})
    
    # fetch features
    X = data.ix[:,['age', 'status', 'family_size', 'location', 'budget']]
    X = pd.get_dummies(X)
    X = X.as_matrix()
    
    # fetch target
    y = pd.get_dummies(data['model'])
    y = y.as_matrix()
    
    # split train and test dataset
    sss = StratifiedShuffleSplit(n_splits=3, test_size=0.5, random_state=seed)
    #sss.get_n_splits(X, y)
    
    for train_index, test_index in sss.split(X, y):
        #print("TRAIN:", train_index, "TEST:", test_index)
        train_X, test_X = X[train_index], X[test_index]
        train_y, test_y = y[train_index], y[test_index]

    # call model
    model = process.model()
    
    # set batch size
    batch_size, verbose, validation_split, epochs = len(train_X), 0, 0.3, 150
    
    # fit model    
    model.fit(train_X, 
              train_y, 
              epochs=epochs, 
              validation_split=validation_split, 
              batch_size=batch_size, 
              verbose=verbose)
    
    # validate accuracy
    loss, accuracy = model.evaluate(test_X, test_y)
    print("Accuracy = {:.2f}".format(accuracy))
    
    #predict = model.predict_classes(test_X)
    #print (predict)
    
if __name__ == '__main__':
    drive()


Program to predict Mercedes Benz vehicle model using various features from Clara 


  32/1018 [..............................] - ETA: 0sAccuracy = 0.04
