In [1]:
import numpy as np
import pandas as pd

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from keras.models import Sequential
from keras.layers import Convolution1D
from keras.layers import Dense
from keras.layers import MaxPooling1D
from keras.layers import Flatten
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers.normalization import BatchNormalization


from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
import tensorflow as tf

from sklearn.utils.class_weight import compute_class_weight

Using TensorFlow backend.


### 1- Tabular Dataset
### 2- Leaf classification dataset for multiclassifcation
### 3- Dataset is too small for Deep learning Models
### 4- This code is just to give an example how to use CNN on Tabular dataset for classification

In [2]:
#load train and test data
df_train = pd.read_csv("train.csv")
df_test = pd.read_csv("test.csv")

In [3]:
# concatenate train and test Dataframe
df = pd.concat([df_train,df_test])

In [4]:
#Check if their is any null value
df.isna().sum()

id             0
species      594
margin1        0
margin2        0
margin3        0
            ... 
texture60      0
texture61      0
texture62      0
texture63      0
texture64      0
Length: 194, dtype: int64

In [5]:
# drop index column
df.drop(["id"], axis =1 , inplace = True)

In [6]:
#remove Null 
df =df[~df.isin([np.nan, np.inf, -np.inf]).any(1)]

In [7]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 990 entries, 0 to 989
Columns: 193 entries, species to texture64
dtypes: float64(192), object(1)
memory usage: 1.5+ MB


In [8]:
#Get Output column
Y = df["species"]

In [9]:
#Get remaning columns
X = df.drop(["species"], axis =1)

In [10]:
# one hot encoding of Label for multiclassification
Y = pd.get_dummies(Y)

In [11]:
# split test and train
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.30, random_state=1 , stratify=Y)

In [12]:
# split test and validation
X_test, X_val, y_test, y_val = train_test_split(X_test, y_test, test_size=0.50, random_state=2 , stratify= y_test)

In [13]:
# convert dataframe to numpy
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()
X_val= X_val.to_numpy()
y_val = y_val.to_numpy()

In [14]:
# Standardize data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)
X_val = scaler.fit_transform(X_val)

In [15]:
# reshape X data to pass in Convolutional1D layers
X_train = np.reshape(X_train, X_train.shape + (1,))
X_test =  np.reshape(X_test, X_test.shape + (1,))
X_val =  np.reshape(X_val, X_val.shape + (1,))

In [16]:
X_train.shape

(693, 192, 1)

In [17]:
# Metrics for CNN model
batchSize =  X_train.shape[0]
length =  X_train.shape[1]
channel = X_train.shape[2]
n_outputs = y_train.shape[1]

In [18]:
#CNN Model
def getModel():
    #Initialising the CNN
    model = Sequential()
    
    model.add(Convolution1D(filters= 8, kernel_initializer='he_uniform',  kernel_size=3, activation='relu',input_shape=(length, channel)))
    model.add(MaxPooling1D(pool_size=2, strides=2))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())
    
    model.add(Convolution1D(filters= 16,  kernel_initializer='he_uniform', kernel_size=3, activation='relu'))
    model.add(MaxPooling1D(pool_size=2,strides=2))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())

    model.add(Convolution1D(filters=32, kernel_size=5,  kernel_initializer='he_uniform', activation="relu",input_shape=(length, channel)))
    model.add(MaxPooling1D(pool_size=3,strides=2))
    model.add(Dropout(0.1))
    model.add(BatchNormalization())
    

    #2.Flattening
    model.add(Flatten())

    #3.Full Connection
    model.add(Dropout(0.3))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(n_outputs, activation='softmax'))

    model.compile(loss='categorical_crossentropy', optimizer= "adam",  metrics=['accuracy'])
    return model

In [19]:
model = getModel()
model.summary()


Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d_1 (Conv1D)            (None, 190, 8)            32        
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 95, 8)             0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 95, 8)             0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 95, 8)             32        
_________________________________________________________________
conv1d_2 (Conv1D)            (None, 93, 16)            400       
_________________________________________________________________
max_pooling1d_2 (MaxPooling1 (None, 46, 16)            0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 46, 16)          

In [20]:
# Train Model
history = model.fit(
    X_train,
    y_train,
    verbose=1,
    batch_size=10,
    shuffle=True,
    epochs=30,
    validation_data=(X_val, y_val)
)


Train on 693 samples, validate on 149 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [21]:
#Save model weights
file_path = r"model_weights.h5"
model.save_weights(file_path)

In [22]:
# Retrve Model , load weights and evaluate test data
model = getModel()
model.load_weights(file_path)
score  = model.evaluate(X_test, y_test)



In [23]:
# Score and Loss
model_parameters = model.metrics_names
print(f"{model_parameters[1]}  :{score[1]}")
print(f"{model_parameters[0]}  :{score[0]}")

accuracy  :0.9527027010917664
loss  :0.11453679307187731
