# Neural Network Model

## Step 1: Reading cleaned_cancer_data.csv from Resources folder to create a Pandas DataFrame.

In [3]:
# Import our dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import pandas as pd
import tensorflow as tf

#  Import and read the charity_data.csv.
import pandas as pd
data_df = pd.read_csv("Resources/cleaned_cancer_data.csv")
data_df.head()

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,Diagnosis Age,Fraction Genome Altered,Mutation Count,Overall Survival Status,TMB (nonsynonymous),Cancer Type Detailed,Prior Cancer Diagnosis Occurence,Smoking History,Sex,Person Cigarette Smoking History Pack Year Value
0,159,159,70,0.4565,189,0:LIVING,6.3,1,0,1,0,38.0
1,160,160,67,0.2221,288,0:LIVING,9.633333,1,0,1,0,52.0
2,161,161,79,0.2362,296,1:DECEASED,9.833333,1,0,1,1,47.0
3,162,162,68,0.0854,1625,0:LIVING,54.233333,1,1,1,0,62.0
4,163,163,66,0.0661,122,0:LIVING,4.066667,1,1,1,0,20.0


In [5]:
# Dropping unneccesary columns
clean_data_df = data_df.drop(["Unnamed: 0.1","Unnamed: 0"],axis=1)
clean_data_df

Unnamed: 0,Diagnosis Age,Fraction Genome Altered,Mutation Count,Overall Survival Status,TMB (nonsynonymous),Cancer Type Detailed,Prior Cancer Diagnosis Occurence,Smoking History,Sex,Person Cigarette Smoking History Pack Year Value
0,70,0.4565,189,0:LIVING,6.300000,1,0,1,0,38.0
1,67,0.2221,288,0:LIVING,9.633333,1,0,1,0,52.0
2,79,0.2362,296,1:DECEASED,9.833333,1,0,1,1,47.0
3,68,0.0854,1625,0:LIVING,54.233333,1,1,1,0,62.0
4,66,0.0661,122,0:LIVING,4.066667,1,1,1,0,20.0
...,...,...,...,...,...,...,...,...,...,...
977,75,0.2382,211,1:DECEASED,7.033333,0,0,1,1,1.0
978,63,0.5420,101,1:DECEASED,3.400000,0,1,1,0,2.5
979,71,0.4405,216,1:DECEASED,7.200000,0,0,1,1,2.5
980,68,0.0598,109,0:LIVING,3.633333,1,0,1,1,95.0


In [9]:
# Checking data types
clean_data_df.nunique()
clean_data_df.dtypes

Diagnosis Age                                         int64
Fraction Genome Altered                             float64
Mutation Count                                        int64
Overall Survival Status                              object
TMB (nonsynonymous)                                 float64
Cancer Type Detailed                                  int64
Prior Cancer Diagnosis Occurence                      int64
Smoking History                                       int64
Sex                                                   int64
Person Cigarette Smoking History Pack Year Value    float64
dtype: object

In [None]:
# Creating dummies dataframe
dummies_df = pd.get_dummies(clean_data_df)
dummies_df

In [None]:
# Step 2: Creating and separating labels (y) and features (X) from dummies DataFrame.
X = dummies_df.drop("Overall Survival Rate",axis=1,inplace=True).values
y = dummies_df["Overall Survival Rate"].values

# Splitting into training and testing data
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=7)

## Step 3: Scaling the data features (X & y) for the model.

In [None]:
scaler = StandardScaler()

# Fitting StandardScaler
X_scaler = scaler.fit(X_train)

# Scaling the data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(y_train)

# Step 4: Compile and Train Model

In [None]:
# Defining model through dense layers and number of neurons for each layer
num_input_features = len(X_train_scaled[0])
hidden_nodes_1 = 6
hidden_nodes_2 = 6
# Create a Keras Sequential model and add more than one Dense hidden layer
nn_model = tf.keras.models.Sequential()

nn_model.add(tf.keras.layers.Dense(units=6, activation="relu", input_dim=2))

nn_model.add(tf.keras.layers.Dense(units=6, activation="relu"))

nn_model.add(tf.keras.layers.Dense(units=1, activation="sigmoid"))

# Check the structure of the Sequential model
nn_model.summary()

In [None]:
# Compile the model and train over more than 100 epochs
nn_model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])

fit_model = nn_model.fit(X_train_scaled, y_train, epochs=200)