In [45]:
#!/usr/bin/python
# -*- coding: utf-8 -*-

"""
Program to predict Mercedes Benz vehicle model using various features from Clara 

"""

# import libraries

# python version libraries
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

# data processing & array handling libraries 
import numpy
import pandas as pd

# deep Learning libraries (Backend - Theano)
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasClassifier
from keras.utils import np_utils

# machine Learning libraries
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import Pipeline

print(__doc__)

# fix random seed for reproducibility
seed = 7
numpy.random.seed(seed)

class Data_processing:

    # initialize
    def __init__(self):
        pass
    
    # function to read CSV file
    def read(self, file):
        
        data = pd.read_csv(file)
        return data

    # function to clean CSV file
    def clean_data(self, data):
    
        # select numeric data alone from data object
        numeric_data = data.loc[:, data.dtypes != object]
    
        # select string data alone from data object to trim
        trim_data = data.select_dtypes(['object'])
    
        # trim all string objects
        trim_data = trim_data.apply(lambda x: x.str.strip())
    
        # merge numeric and string data in to data object
        data = pd.concat([numeric_data, trim_data], axis=1)
    
        # change column name to lower case
        data.columns = [x.lower() for x in data.columns]
        return data

# define baseline model
def baseline_model():
    
    # create model
    model = Sequential()
    model.add(Dense(4, input_dim=61, kernel_initializer='normal', activation='relu'))
    model.add(Dense(71, kernel_initializer='normal', activation='sigmoid'))
    # Compile model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

# function to drive the main flow
def drive():
    
    # instantiate class Data_processing 
    process = Data_processing()

    # read input file
    data = process.read("/home/azureuser/clara_deeplearning/model_data.csv")
    
    # clean input file
    data = process.clean_data(data)
    
    # remove junk columns
    data = data[data.columns[~data.columns.str.contains('unnamed:')]]
    
    # rename column 
    data = data.rename(columns={'age category': 'age_category'})
    
    cols = [col for col in data.columns if col not in ['model', 'name', 'age_category', 'ld_cd']]
    X = data[cols]
    Y = data['model']
    
    dummy_X = pd.get_dummies(X)
    dummy_Y = pd.get_dummies(Y)
    
    #print (dummy_X.shape)
    estimator = KerasClassifier(build_fn=baseline_model, epochs=200, batch_size=5, verbose=0)
    
    print (estimator)
    
    kfold = KFold(n_splits=10, shuffle=True, random_state=seed)
    
    #results = cross_val_score(estimator, dummy_X, dummy_Y, cv=kfold)
    #print("Baseline: %.2f%% (%.2f%%)" % (results.mean()*100, results.std()*100))
    
if __name__ == '__main__':
    drive()


Program to predict Mercedes Benz vehicle model using various features from Clara 


<keras.wrappers.scikit_learn.KerasClassifier object at 0x7f9901c20f50>
