In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.models import Sequential
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import pandas as pd

In [3]:
data = pd.read_csv('survey lung cancer.csv')
data.head()

Unnamed: 0,GENDER,AGE,SMOKING,YELLOW_FINGERS,ANXIETY,PEER_PRESSURE,CHRONIC DISEASE,FATIGUE,ALLERGY,WHEEZING,ALCOHOL CONSUMING,COUGHING,SHORTNESS OF BREATH,SWALLOWING DIFFICULTY,CHEST PAIN,LUNG_CANCER
0,M,69,1,2,2,1,1,2,1,2,2,2,2,2,2,YES
1,M,74,2,1,1,1,2,2,2,1,1,1,2,2,2,YES
2,F,59,1,1,1,2,1,2,1,2,1,2,2,1,2,NO
3,M,63,2,2,2,1,1,1,1,1,2,1,1,2,2,NO
4,F,63,1,2,1,1,1,1,1,2,1,2,2,1,1,NO


In [4]:
data.columns = data.columns.str.lower()
data.rename(columns={'chronic disease': 'chronic_disease', 'alcohol consuming': 'alcohol_consuming',
                    'shortness of breath': 'shortness_of_breath', 'swallowing difficulty': 'swallowing_difficulty',
                    'chest pain': 'chest_pain', 'fatigue ': 'fatigue', 'allergy ': 'allergy'}, inplace=True)
data.drop('gender', axis=1, inplace=True)
data.smoking.replace({1: 0, 2: 1}, inplace=True)
data.yellow_fingers.replace({1: 0, 2: 1}, inplace=True)
data.anxiety.replace({1: 0, 2: 1}, inplace=True)
data.peer_pressure.replace({1: 0, 2: 1}, inplace=True)
data.chronic_disease.replace({1: 0, 2: 1}, inplace=True)
data.fatigue.replace({1: 0, 2: 1}, inplace=True)
data.allergy.replace({1: 0, 2: 1}, inplace=True)
data.wheezing.replace({1: 0, 2: 1}, inplace=True)
data.alcohol_consuming.replace({1: 0, 2: 1}, inplace=True)
data.coughing.replace({1: 0, 2: 1}, inplace=True)
data.shortness_of_breath.replace({1: 0, 2: 1}, inplace=True)
data.swallowing_difficulty.replace({1: 0, 2: 1}, inplace=True)
data.chest_pain.replace({1: 0, 2: 1}, inplace=True)
data.lung_cancer.replace({'YES': 1, 'NO': 0}, inplace=True)
data.head()

Unnamed: 0,age,smoking,yellow_fingers,anxiety,peer_pressure,chronic_disease,fatigue,allergy,wheezing,alcohol_consuming,coughing,shortness_of_breath,swallowing_difficulty,chest_pain,lung_cancer
0,69,0,1,1,0,0,1,0,1,1,1,1,1,1,1
1,74,1,0,0,0,1,1,1,0,0,0,1,1,1,1
2,59,0,0,0,1,0,1,0,1,0,1,1,0,1,0
3,63,1,1,1,0,0,0,0,0,1,0,0,1,1,0
4,63,0,1,0,0,0,0,0,1,0,1,1,0,0,0


In [5]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 309 entries, 0 to 308
Data columns (total 15 columns):
 #   Column                 Non-Null Count  Dtype
---  ------                 --------------  -----
 0   age                    309 non-null    int64
 1   smoking                309 non-null    int64
 2   yellow_fingers         309 non-null    int64
 3   anxiety                309 non-null    int64
 4   peer_pressure          309 non-null    int64
 5   chronic_disease        309 non-null    int64
 6   fatigue                309 non-null    int64
 7   allergy                309 non-null    int64
 8   wheezing               309 non-null    int64
 9   alcohol_consuming      309 non-null    int64
 10  coughing               309 non-null    int64
 11  shortness_of_breath    309 non-null    int64
 12  swallowing_difficulty  309 non-null    int64
 13  chest_pain             309 non-null    int64
 14  lung_cancer            309 non-null    int64
dtypes: int64(15)
memory usage: 36.3 KB


In [7]:
scaler = MinMaxScaler()

train_list = np.delete(data.columns.values, -1)

x_train = tf.convert_to_tensor(scaler.fit_transform(data[train_list].astype('float64')))

y_train = data.lung_cancer

print(x_train[0])
x_train.shape

tf.Tensor(
[0.72727273 0.         1.         1.         0.         0.
 1.         0.         1.         1.         1.         1.
 1.         1.        ], shape=(14,), dtype=float64)


TensorShape([309, 14])

In [8]:
def build_model():
    model = Sequential(name='Cancer_Recognition')
    model.add(Dense(512, activation='relu', input_dim=(14)))
    model.add(Dense(1024, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [9]:
model = build_model()

In [10]:
model.summary()

Model: "Cancer_Recognition"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 512)               7680      
                                                                 
 dense_1 (Dense)             (None, 1024)              525312    
                                                                 
 dense_2 (Dense)             (None, 1)                 1025      
                                                                 
Total params: 534,017
Trainable params: 534,017
Non-trainable params: 0
_________________________________________________________________


In [11]:
history = model.fit(x_train, y_train, epochs=10, validation_split=0.2,
                   verbose=2)

Epoch 1/10
8/8 - 1s - loss: 0.4132 - accuracy: 0.7733 - val_loss: 0.4226 - val_accuracy: 0.8387 - 743ms/epoch - 93ms/step
Epoch 2/10
8/8 - 0s - loss: 0.2859 - accuracy: 0.8826 - val_loss: 0.3125 - val_accuracy: 0.8387 - 56ms/epoch - 7ms/step
Epoch 3/10
8/8 - 0s - loss: 0.2399 - accuracy: 0.8826 - val_loss: 0.2822 - val_accuracy: 0.8387 - 51ms/epoch - 6ms/step
Epoch 4/10
8/8 - 0s - loss: 0.2139 - accuracy: 0.8866 - val_loss: 0.2540 - val_accuracy: 0.8548 - 55ms/epoch - 7ms/step
Epoch 5/10
8/8 - 0s - loss: 0.1954 - accuracy: 0.9028 - val_loss: 0.2424 - val_accuracy: 0.8871 - 51ms/epoch - 6ms/step
Epoch 6/10
8/8 - 0s - loss: 0.1731 - accuracy: 0.9352 - val_loss: 0.2047 - val_accuracy: 0.9355 - 53ms/epoch - 7ms/step
Epoch 7/10
8/8 - 0s - loss: 0.1570 - accuracy: 0.9312 - val_loss: 0.2227 - val_accuracy: 0.9032 - 50ms/epoch - 6ms/step
Epoch 8/10
8/8 - 0s - loss: 0.1454 - accuracy: 0.9474 - val_loss: 0.1737 - val_accuracy: 0.9516 - 48ms/epoch - 6ms/step
Epoch 9/10
8/8 - 0s - loss: 0.1266 - a

In [12]:
history.history

{'loss': [0.4132351279258728,
  0.285892128944397,
  0.23994415998458862,
  0.2139173299074173,
  0.19535106420516968,
  0.17313846945762634,
  0.15703679621219635,
  0.14535494148731232,
  0.12662258744239807,
  0.1215834841132164],
 'accuracy': [0.7732793688774109,
  0.8825910687446594,
  0.8825910687446594,
  0.8866396546363831,
  0.9028339982032776,
  0.9352226853370667,
  0.931174099445343,
  0.9473684430122375,
  0.9473684430122375,
  0.9554656147956848],
 'val_loss': [0.42261505126953125,
  0.3124917149543762,
  0.2821962237358093,
  0.2539657950401306,
  0.24244233965873718,
  0.20467807352542877,
  0.22270353138446808,
  0.1736658364534378,
  0.18567337095737457,
  0.16914896667003632],
 'val_accuracy': [0.8387096524238586,
  0.8387096524238586,
  0.8387096524238586,
  0.8548387289047241,
  0.8870967626571655,
  0.9354838728904724,
  0.9032257795333862,
  0.9516128897666931,
  0.9032257795333862,
  0.9193548560142517]}

In [13]:
results = model.evaluate(x_train, y_train)
print(f'test loss, test acc: {results}')

test loss, test acc: [0.12189887464046478, 0.9449838399887085]


In [22]:
test = x_train[0]
test = tf.reshape(test, [1, 14])
result = model.predict(test)
print(1 if result >= 0.5 else 0)

1


In [26]:
for i in range(10):
    check = x_train[i]
    check = tf.reshape(check, [-1, 14])
    result = 1 if model.predict(check) >= 0.5 else 0
    print(f'Predicted: {result}\tActual: {y_train[i]}')

Predicted: 1	Actual: 1
Predicted: 1	Actual: 1
Predicted: 0	Actual: 0
Predicted: 0	Actual: 0
Predicted: 0	Actual: 0
Predicted: 1	Actual: 1
Predicted: 1	Actual: 1
Predicted: 1	Actual: 1
Predicted: 0	Actual: 0
Predicted: 1	Actual: 1


In [23]:
data.head(10)

Unnamed: 0,age,smoking,yellow_fingers,anxiety,peer_pressure,chronic_disease,fatigue,allergy,wheezing,alcohol_consuming,coughing,shortness_of_breath,swallowing_difficulty,chest_pain,lung_cancer
0,69,0,1,1,0,0,1,0,1,1,1,1,1,1,1
1,74,1,0,0,0,1,1,1,0,0,0,1,1,1,1
2,59,0,0,0,1,0,1,0,1,0,1,1,0,1,0
3,63,1,1,1,0,0,0,0,0,1,0,0,1,1,0
4,63,0,1,0,0,0,0,0,1,0,1,1,0,0,0
5,75,0,1,0,0,1,1,1,1,0,1,1,0,0,1
6,52,1,0,0,0,0,1,0,1,1,1,1,0,1,1
7,51,1,1,1,1,0,1,1,0,0,0,1,1,0,1
8,68,1,0,1,0,0,1,0,0,0,0,0,0,0,0
9,53,1,1,1,1,1,0,1,0,1,0,0,1,1,1


In [27]:
model.save('saved_model/lung_cancer_model')

INFO:tensorflow:Assets written to: saved_model/lung_cancer_model\assets


In [28]:
from pickle import dump


dump(scaler, open('scaler.pkl', 'wb'))

In [29]:
test

<tf.Tensor: shape=(1, 14), dtype=float64, numpy=
array([[0.72727273, 0.        , 1.        , 1.        , 0.        ,
        0.        , 1.        , 0.        , 1.        , 1.        ,
        1.        , 1.        , 1.        , 1.        ]])>

In [30]:
data[data.lung_cancer == 0]

Unnamed: 0,age,smoking,yellow_fingers,anxiety,peer_pressure,chronic_disease,fatigue,allergy,wheezing,alcohol_consuming,coughing,shortness_of_breath,swallowing_difficulty,chest_pain,lung_cancer
2,59,0,0,0,1,0,1,0,1,0,1,1,0,1,0
3,63,1,1,1,0,0,0,0,0,1,0,0,1,1,0
4,63,0,1,0,0,0,0,0,1,0,1,1,0,0,0
8,68,1,0,1,0,0,1,0,0,0,0,0,0,0,0
12,60,1,0,0,0,0,1,0,0,0,0,1,0,0,0
14,69,1,0,0,0,0,0,1,1,1,1,0,0,1,0
19,61,0,0,0,0,1,1,0,0,0,0,1,0,0,0
22,21,1,0,0,0,1,1,1,0,0,0,1,0,0,0
27,69,0,0,0,1,0,1,0,1,0,1,1,0,1,0
29,55,0,1,0,0,0,1,0,1,1,1,1,0,0,0


In [39]:
test = x_train[286]
test = tf.reshape(test, [1, 14])
model.predict(test)[0][0]

0.2869744