In [1]:
import numpy as np
import scipy.special        #for sigmoid function

In [106]:
#defining neural network class
class neuralNetwork:
    
    
    #initialising the neural network
    def __init__(self, input_nodes, hidden_nodes, output_nodes, learning_rate):
        
        self.inodes = input_nodes
        self.hnodes = hidden_nodes
        self.onodes = output_nodes
        self.LR = learning_rate
        
        #linking weight matrices, weight(input, hidden) labelled as wih and weight(hidden, output) labelled as who
        #matrices are in form weight_ij
        
        #self.wih = np.random.normal(0.0, pow(self.hnodes, -0.5), (self.hnodes, self.inodes)) / 10
        #self.who = np.random.normal(0.0, pow(self.onodes, -0.5), (self.onodes, self.hnodes)) / 10
        self.wih = np.random.randn(hidden_nodes, input_nodes)
        self.who = np.random.randn(output_nodes, hidden_nodes)
        self.biasih = np.random.rand(hidden_nodes, 1)
        self.biasho = np.random.rand(output_nodes, 1)
        
        #sigmoid function as the activation function
        self.activation_function = lambda x : scipy.special.expit(x)

        pass
    
    
    #train neural network
    def train(self, inputs_list, targets_list):
        
        #converting input list into array
        inputs = np.array(inputs_list, ndmin = 2).T
        
        #converting targets list into array
        targets = np.array(targets_list, ndmin = 2).T
        
        #calculating signals that enter the hidden layer
        hidden_inputs = np.dot(self.wih, inputs) + self.biasih
        
        #calculating signals that leave the hidden layer as output
        hidden_outputs = self.activation_function(hidden_inputs)
        
        #calculating signals that enter the final output layer
        final_inputs = np.dot(self.who, hidden_outputs) + self.biasho
        
        #calculating signals that leave final layer as output
        final_outputs = self.activation_function(final_inputs)
        #print(final_outputs)
        
        #calculating error which is (target - actual or derived)
        output_errors = targets - final_outputs
        
        #errors(hidden) = (weight(hidden-output).transpose) dot product (errors(output))
        hidden_errors = np.dot(self.who.T, output_errors)
        
        #updating weights for links between hidden and output layers
        #based on known formula
        #delta(W(jk)) = learningRate * {[E(k) * sigmoid(O(k)) * (1 - sigmoid(O(k)))] dot product (O(j).Transpose)}
        self.who += self.LR * np.dot((output_errors * final_outputs * (1 - final_outputs)), np.transpose(hidden_outputs))
        self.wih += self.LR * np.dot((hidden_errors * hidden_outputs * (1 - hidden_outputs)), np.transpose(inputs))
        
        pass
    
    
    #query or give an output via output nodes
    def query(self, inputs_list):
        
        #converting input list into array
        inputs = np.array(inputs_list, ndmin = 2).T
        
        #calculating signals that enter the hidden layer
        hidden_inputs = np.dot(self.wih, inputs)
        
        #calculating signals that leave the hidden layer as output
        hidden_outputs = self.activation_function(hidden_inputs)
        
        #calculating signals that enter the final output layer
        final_inputs = np.dot(self.who, hidden_outputs)
        
        #calculating signals that leave final layer as output
        final_outputs = self.activation_function(final_inputs)
        return final_outputs

In [107]:
inputnodes = 15
hiddennodes = 8
outputnodes = 2
learningrate = 0.025

#create NN instance
n = neuralNetwork(inputnodes, hiddennodes, outputnodes, learningrate)

In [108]:
#load training data and convert into list
training_file = open("/home/sarvesh/Downloads/survey lung cancer.csv", 'r')
training_list = training_file.readlines()
training_file.close()
for i in range(1, len(training_list)):
    training_list[i] = training_list[i].replace("1", "0")
    training_list[i] = training_list[i].replace("2", "1")
    training_list[i] = training_list[i].replace("YES", "1")
    training_list[i] = training_list[i].replace("NO", "0")
    training_list[i] = training_list[i].replace("M", "0")
    training_list[i] = training_list[i].replace("F", "1")
print(training_list)

['GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE ,ALLERGY ,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER\n', '0,69,0,1,1,0,0,1,0,1,1,1,1,1,1,1\n', '0,74,1,0,0,0,1,1,1,0,0,0,1,1,1,1\n', '1,59,0,0,0,1,0,1,0,1,0,1,1,0,1,0\n', '0,63,1,1,1,0,0,0,0,0,1,0,0,1,1,0\n', '1,63,0,1,0,0,0,0,0,1,0,1,1,0,0,0\n', '1,75,0,1,0,0,1,1,1,1,0,1,1,0,0,1\n', '0,51,1,0,0,0,0,1,0,1,1,1,1,0,1,1\n', '1,50,1,1,1,1,0,1,1,0,0,0,1,1,0,1\n', '1,68,1,0,1,0,0,1,0,0,0,0,0,0,0,0\n', '0,53,1,1,1,1,1,0,1,0,1,0,0,1,1,1\n', '1,60,1,1,1,1,1,1,0,1,0,1,1,1,0,1\n', '0,71,0,0,0,0,1,1,1,1,1,1,1,0,1,1\n', '1,60,1,0,0,0,0,1,0,0,0,0,1,0,0,0\n', '0,58,1,0,0,0,0,1,1,1,1,1,1,0,1,1\n', '0,69,1,0,0,0,0,0,1,1,1,1,0,0,1,0\n', '1,48,0,1,1,1,1,1,1,1,0,1,1,1,0,1\n', '0,75,1,0,0,0,1,0,1,1,1,1,1,0,1,1\n', '0,57,1,1,1,1,1,0,0,0,1,0,0,1,1,1\n', '1,68,1,1,1,1,1,1,0,0,0,1,1,0,0,1\n', '1,60,0,0,0,0,1,1,0,0,0,0,1,0,0,0\n', '1,44,1,1,1,1,1,1,0,0,0,0,1,1,0,1\n', '1,64

In [109]:
epochs = 10

for e in range(epochs):
    
    #go through training data
    for iteration in training_list[1:284]:
        all_values = iteration.split(',')

        #since inputs are pixel values, they range from 0 to 255
        #scale inputs by dividing by 255 and multiplying by 0.99  and adding 0.01
        inputs = (np.asfarray(all_values[:15]) * 0.98) + 0.01
        inputs[1] = ((inputs[1] - 0.01) / 0.98) / 100


        #create target value by having all labels except desired one as 0.01. Desired label to be 0.99
        targets = np.zeros(outputnodes) + 0.01

        #all_values[0] is the first value of the training/testing set which is the final answer too
        targets[int(all_values[-1])] = 0.99
        n.train(inputs, targets)
print(inputs)

[0.01 0.46 0.01 0.99 0.99 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.01 0.99
 0.99]


In [110]:
#load testing data and convert into list
testing_list = training_list[284:]

['0,60,0,1,1,0,0,1,0,1,1,1,1,1,1,1\n', '0,58,1,1,1,1,1,0,0,0,1,0,0,1,1,1\n', '1,58,1,1,1,1,0,1,0,0,0,1,1,1,0,1\n', '1,63,0,0,0,0,1,1,0,0,0,0,1,0,0,0\n', '1,50,1,1,1,1,0,1,0,0,0,0,1,1,0,1\n', '1,60,0,1,1,1,0,0,1,1,0,1,0,1,0,1\n', '1,60,1,0,0,0,1,1,1,0,0,0,1,0,0,1\n', '0,76,1,0,0,0,0,1,1,1,1,1,1,0,1,1\n', '0,70,1,1,1,0,1,0,1,1,1,1,0,1,1,1\n', '0,69,0,0,1,0,0,1,0,1,1,1,1,1,0,1\n', '1,56,1,1,1,0,0,1,1,0,0,0,1,0,1,1\n', '0,67,0,0,0,1,0,1,0,1,0,1,1,0,1,1\n', '1,54,1,1,1,0,1,0,0,1,1,0,1,1,1,1\n', '0,63,0,1,0,0,0,1,0,1,1,1,1,0,0,1\n', '1,47,1,1,0,1,1,1,1,1,0,1,1,0,0,1\n', '0,61,1,0,1,0,0,1,0,1,1,1,1,0,1,1\n', '0,65,1,1,1,1,0,1,1,0,0,0,1,1,0,1\n', '1,63,1,1,1,1,1,1,1,1,0,1,1,1,1,1\n', '0,64,0,1,1,1,0,0,1,0,1,0,0,1,1,1\n', '1,65,1,1,1,1,0,1,0,1,0,1,1,1,0,1\n', '0,50,0,1,0,0,1,1,1,1,1,1,1,0,1,1\n', '1,56,0,0,0,1,1,1,0,0,1,1,1,1,0,1\n', '0,70,1,0,0,0,0,1,1,1,1,1,1,0,1,1\n', '0,58,1,0,0,0,0,0,1,1,1,1,0,0,1,1\n', '0,67,1,0,1,0,0,1,1,0,1,1,1,0,1,1\n', '0,61,0,0,0,1,0,1,1,1,1,0,0,1,0,1\n']


In [111]:
score = []

#go through testing data
for iteration in testing_list:
    all_values = iteration.split(',')
    correct_ans = int(all_values[-1])
    
    #print("Actual number is", correct_ans)
    
    #since inputs are pixel values, they range from 0 to 255
    #scale inputs by dividing by 255 and multiplying by 0.99  and adding 0.01
    inputs = (np.asfarray(all_values[:-1]) * 0.98) + 0.01
    inputs[1] = ((inputs[1] - 0.01) / 0.98) / 100
    
    #final output
    outputs = n.query(inputs)
    
    #index of highest output is the answer
    index = np.argmax(outputs)
    
    #print("Networks identified answer is ", index)
    
    if index == correct_ans:
        score.append(1)
    else:
        score.append(0)
print(score)

[1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]


In [112]:
#calculate accuracy
score_Array = np.asarray(score)
print("Accuracy is ", score_Array.sum() / score_Array.size * 100, "%")

Accuracy is  96.15384615384616 %


In [114]:
print("Please answer the below questions with integers only. For yes/no questions, please answer 1 for Yes and 0 for No")
user_data = []
user_data.append(input("Please enter your gender. 0 for male and 1 for Female: "))
user_data.append(input("Please enter your age: "))
user_data.append(input("Please enter if you smoke: "))
user_data.append(input("Please enter if you have yellow fingers: "))
user_data.append(input("Please enter if you have anxiety issues: "))
user_data.append(input("Please enter if you are under peer pressure: "))
user_data.append(input("Please enter if you have a chronic disease: "))
user_data.append(input("Please enter if you face fatigue: "))
user_data.append(input("Please enter if you have any allergies: "))
user_data.append(input("Please enter if you wheeze a lot: "))
user_data.append(input("Please enter if you consume alcohol: "))
user_data.append(input("Please enter if you cough too frequently: "))
user_data.append(input("Please enter if you face shortness of breath: "))
user_data.append(input("Please enter if you have difficulty swallowing: "))
user_data.append(input("Please enter if you have chest pain: "))
print(user_data)

Please answer the below questions with integers only. For yes/no questions, please answer 1 for Yes and 0 for No
Please enter your gender. 0 for male and 1 for Female: 0
Please enter your age: 65
Please enter if you smoke: 0
Please enter if you have yellow fingers: 0
Please enter if you have anxiety issues: 0
Please enter if you are under peer pressure: 0
Please enter if you have a chronic disease: 0
Please enter if you face fatigue: 0
Please enter if you have any allergies: 0
Please enter if you wheeze a lot: 0
Please enter if you consume alcohol: 0
Please enter if you cough too frequently: 0
Please enter if you face shortness of breath: 0
Please enter if you have difficulty swallowing: 0
Please enter if you have chest pain: 0
['0', '65', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0']


In [115]:
inputs = (np.asfarray(user_data))
output = n.query(inputs)
index = np.argmax(outputs)

print(output)

[[0.12770567]
 [0.29326159]]
