Software Metrics - Bug detection

Neural Network created using Keras API

Dataset - The datasets are taken form tera-PROMISE

In [1]:
# Import libraries that we need

import numpy as np 
import pandas as pd

from keras.models import Sequential 
from keras.layers import Dense,Activation,Dropout,Lambda 
from keras.layers.normalization import BatchNormalization 
from keras.utils import np_utils
from keras.models import model_from_json

from sklearn.preprocessing import normalize
from sklearn.utils import shuffle

Using TensorFlow backend.


Get data from file and enter it in a dataframe

In [2]:
# Read data from csv file 
df = pd.read_csv("../data/promise/complete_set.csv")

In [3]:
# Add a new column that have only twe values:
# 0 -> no defects found
# 1 -> defects found
df['bugs'] = np.where(df['bug'] > 0, 1, 0)

In [4]:
# Drop the old bug column
df = df.drop('bug', 1)

In [5]:
# Check data form
df.shape

(86279, 21)

In [6]:
# schuffle dataframe rows
df = shuffle(df).reset_index(drop=True)

In [7]:
df.head()

Unnamed: 0,wmc,dit,noc,cbo,rfc,lcom,ca,ce,npm,lcom3,...,dam,moa,mfa,cam,ic,cbm,amc,max_cc,avg_cc,bugs
0,14,1,0,7,25,57,5,2,14,0.858974,...,1.0,0,0.0,0.314286,0,0,5.571429,1,0.9286,0
1,1,1,0,0,5,0,0,0,0,2.0,...,0.0,0,0.0,0.0,0,0,12.0,0,0.0,1
2,2,3,0,9,17,1,0,9,2,2.0,...,0.0,0,0.857143,0.625,1,1,44.0,8,4.0,0
3,7,4,0,18,32,19,0,18,1,0.966667,...,0.2,0,0.895833,0.5,2,4,33.0,2,0.8571,0
4,2,4,0,26,26,1,0,26,2,1.0,...,0.0,0,0.928571,0.625,1,1,57.5,1,0.5,0


Prepare data for training

In [8]:
# split data into training features and and target features
# bugs column(last column) become the target 
data = df.iloc[:,0:20].values
target = df.iloc[:,20].values

In [9]:
# data normalization
nor_data = normalize(data, axis=0)

In [10]:
# split data into training data and testing data
l = len(df)
# 80% data for model train
l_train = int(0.8 * l)
# 20% data for model test
l_test  = int(0.2 * l)

data_train = nor_data[:l_train]
data_test  = nor_data[l_train:]

target_train = target[:l_train]
target_test  = target[l_train:]

In [11]:
# Transform target in one hot vector (to use softmax)
# 0 become [1 0]
# 1 become [0 1]
target_train = np_utils.to_categorical(target_train, num_classes=2)
target_test  = np_utils.to_categorical(target_test, num_classes=2)

In [12]:
# Create Keras Model
model = Sequential()

model.add(Dense(1000,input_dim=20,activation='relu'))
model.add(Dense(500,activation='relu'))
model.add(Dropout(0.2))
model.add(Dense(2,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])

In [36]:
# Model fitting
model.fit(data_train,target_train,validation_data=(data_test,target_test),batch_size=2000,epochs=5000,verbose=1)

Train on 69023 samples, validate on 17256 samples
Epoch 1/1


<keras.callbacks.History at 0x25d59f2ff28>

In [13]:
# check accuracy
pred = model.predict(data_test)
size = len(pred)
y       = np.argmax(target_test, axis=1)
predict = np.argmax(pred, axis=1)

accuracy = np.sum(y == predict) / size * 100
print ("Accuracy: ", accuracy )

Accuracy:  18.8456189152


In [14]:
def save_model(m, file_name):
    '''
    This function save the model m into a file (file_name)
    '''
    
    model_json = m.to_json()
    with open(file_name + ".json", "w") as json_file:
        json_file.write(model_json)
    m.save_weights(file_name + ".h5")
    
    print ("Model saved!")

In [15]:
# Save the model
save_model(model, "../Models/model_name")

Model saved!
