In [114]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [115]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [116]:
#reading the csv file that contains all the data
data = pd.read_csv('/content/drive/MyDrive/cancer_reg.csv', encoding='ISO-8859-1')

In [117]:
# column names are lower cased to make them easy to write and read
data.rename(columns=lambda x: str.lower(x), inplace=True)
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3047 entries, 0 to 3046
Data columns (total 34 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   avganncount              3047 non-null   float64
 1   avgdeathsperyear         3047 non-null   int64  
 2   target_deathrate         3047 non-null   float64
 3   incidencerate            3047 non-null   float64
 4   medincome                3047 non-null   int64  
 5   popest2015               3047 non-null   int64  
 6   povertypercent           3047 non-null   float64
 7   studypercap              3047 non-null   float64
 8   binnedinc                3047 non-null   object 
 9   medianage                3047 non-null   float64
 10  medianagemale            3047 non-null   float64
 11  medianagefemale          3047 non-null   float64
 12  geography                3047 non-null   object 
 13  avghouseholdsize         3047 non-null   float64
 14  percentmarried          

In [118]:
#here, we drop features geograpghy abd binnedinc because they contain string values and we drop soe other features that contain Nan values
df=data.drop(['geography', 'pctsomecol18_24','pctprivatecoveragealone','pctemployed16_over','binnedinc'], axis=1)

In [119]:
df.corr(method = 'pearson')

Unnamed: 0,avganncount,avgdeathsperyear,target_deathrate,incidencerate,medincome,popest2015,povertypercent,studypercap,medianage,medianagemale,...,pctprivatecoverage,pctempprivcoverage,pctpubliccoverage,pctpubliccoveragealone,pctwhite,pctblack,pctasian,pctotherrace,pctmarriedhouseholds,birthrate
avganncount,1.0,0.939408,-0.143532,0.073553,0.269145,0.926894,-0.135694,0.082071,-0.024098,-0.124969,...,0.132244,0.202349,-0.173548,-0.093699,-0.136501,0.031376,0.435071,0.209184,-0.106221,-0.034508
avgdeathsperyear,0.939408,1.0,-0.090715,0.06269,0.223207,0.977634,-0.066918,0.063488,-0.024599,-0.148487,...,0.056183,0.160124,-0.131687,-0.027338,-0.187159,0.084607,0.443074,0.215149,-0.160266,-0.07442
target_deathrate,-0.143532,-0.090715,1.0,0.449432,-0.428615,-0.120073,0.429389,-0.022285,0.004375,-0.021929,...,-0.386066,-0.267399,0.404572,0.449358,-0.1774,0.257024,-0.186331,-0.189894,-0.293325,-0.087407
incidencerate,0.073553,0.06269,0.449432,1.0,-0.001036,0.026912,0.009046,0.077283,0.018089,-0.014733,...,0.105174,0.149825,0.046109,0.040812,-0.01451,0.113489,-0.008123,-0.208748,-0.152176,-0.118181
medincome,0.269145,0.223207,-0.428615,-0.001036,1.0,0.235523,-0.788965,0.044003,-0.013288,-0.091663,...,0.724175,0.747294,-0.754822,-0.719756,0.167225,-0.270232,0.425844,0.083635,0.446083,-0.010195
popest2015,0.926894,0.977634,-0.120073,0.026912,0.235523,1.0,-0.065299,0.055722,-0.025219,-0.176608,...,0.052677,0.15865,-0.160066,-0.041469,-0.190095,0.073044,0.464168,0.241468,-0.127979,-0.05774
povertypercent,-0.135694,-0.066918,0.429389,0.009046,-0.788965,-0.065299,1.0,-0.055652,-0.02928,-0.214001,...,-0.822534,-0.6831,0.651162,0.798642,-0.509433,0.51153,-0.157289,0.047096,-0.604953,-0.012283
studypercap,0.082071,0.063488,-0.022285,0.077283,0.044003,0.055722,-0.055652,1.0,-0.02603,-0.036647,...,0.092545,0.100063,-0.051497,-0.055512,0.023291,-0.019761,0.062543,-0.015247,-0.051736,0.010676
medianage,-0.024098,-0.024599,0.004375,0.018089,-0.013288,-0.025219,-0.02928,-0.02603,1.0,0.129119,...,0.004665,-0.036926,0.04906,-0.003298,0.035009,-0.017173,-0.038424,-0.030277,0.014504,-0.008276
medianagemale,-0.124969,-0.148487,-0.021929,-0.014733,-0.091663,-0.176608,-0.214001,-0.036647,0.129119,1.0,...,0.082232,-0.208664,0.398967,0.002479,0.398044,-0.242748,-0.238322,-0.266655,0.222278,-0.104105


In [120]:
features_all = df.drop(columns=['target_deathrate'],axis=1)  # Replace 'target_deathrate' with the target column name
target = df['target_deathrate']


In [121]:
# loading input and outpur
X=dataset[['incidencerate','pctpubliccoveragealone','povertypercent','pcths25_over','pctpubliccoverage', 'pctunemployed16_over', 'pcths18_24']]
Y=dataset['target_deathrate']

In [122]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [123]:
X_train_val, X_test, Y_train_val, Y_test = train_test_split(X, Y, test_size=0.2, random_state=50)
X_train, X_val, Y_train, Y_val = train_test_split(X_train_val, Y_train_val, test_size=0.2, random_state=50)


In [124]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)

In [125]:
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.optimizers import Adam

In [126]:
import numpy as np
import random
seed_value = 0

# Set the random seeds for reproducibility

np.random.seed(seed_value)
random.seed(seed_value)
tf.random.set_seed(seed_value)

In [127]:
# code for printed date and time
# Custom callback to add date and time at the beginning and end of each epoch
import time
from datetime import datetime
from tensorflow.keras.callbacks import Callback

class TimeHistory(Callback):
    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_time_start = time.time()
        self.start_time = datetime.now()  # Store the start time for each epoch
        print(f"Epoch {epoch + 1} started at {self.start_time.strftime('%Y-%m-%d %H:%M:%S')}")

    def on_epoch_end(self, epoch, logs=None):
        epoch_duration = time.time() - self.epoch_time_start
        self.end_time = datetime.now()  # Store the end time for each epoch
        print(f"Epoch {epoch + 1} ended at {self.end_time.strftime('%Y-%m-%d %H:%M:%S')} and took {epoch_duration:.2f} seconds")

In [128]:
test_model = Sequential()
test_model.add(Dense(30, input_dim=X_train.shape[1], activation='relu'))  # First hidden layer with 30 neurons
test_model.add(Dense(16, activation='relu'))  # Second hidden layer with ReLU activation
test_model.add(Dense(8, activation='relu'))  # Second hidden layer with ReLU activation
test_model.add(Dense(4, activation='relu')) # adding fourth hidden layer with ReLU

test_model.add(Dense(1))  # Output layer

# Compile the model with Adam optimizer
test_model.compile(optimizer=Adam(learning_rate=0.001), loss='mse')

time_callback = TimeHistory()

# Train the model
history=test_model.fit(X_train, Y_train, epochs=100, verbose=1,validation_split=0.2,callbacks=[time_callback])

Epoch 1 started at 2024-09-21 16:50:23
Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m31/49[0m [32m━━━━━━━━━━━━[0m[37m━━━━━━━━[0m [1m0s[0m 2ms/step - loss: 32471.8457 Epoch 1 ended at 2024-09-21 16:50:25 and took 1.95 seconds
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step - loss: 32617.8125 - val_loss: 32343.7285
Epoch 2 started at 2024-09-21 16:50:25
Epoch 2/100
[1m46/49[0m [32m━━━━━━━━━━━━━━━━━━[0m[37m━━[0m [1m0s[0m 3ms/step - loss: 32269.8516Epoch 2 ended at 2024-09-21 16:50:25 and took 0.60 seconds
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 32287.5762 - val_loss: 31420.3047
Epoch 3 started at 2024-09-21 16:50:25
Epoch 3/100
[1m41/49[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0m [1m0s[0m 3ms/step - loss: 30940.5410Epoch 3 ended at 2024-09-21 16:50:26 and took 0.26 seconds
[1m49/49[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 30875.6445 - val_loss: 27827.3535
Epoch 4 started at 2024-09-21 16:50:26
Epoch 4/100
[1m40/49[0m [32m━━━━━━━━━━━━━━━━[0m[37m━━━━[0

In [129]:
#evaluation on validation set
Y_val_pred = test_model.predict(X_val)
mse_val = mean_squared_error(Y_val, Y_val_pred)
rmse_val = np.sqrt(mse_val)
r2_val = r2_score(Y_val, Y_val_pred)

# Evaluate on test set
Y_test_pred = test_model.predict(X_test)
mse_test = mean_squared_error(Y_test, Y_test_pred)
rmse_test = np.sqrt(mse_test)
r2_test = r2_score(Y_test, Y_test_pred)

# Output the results
{
    'Validation MSE': mse_val,
    'Validation RMSE': rmse_val,
    'Validation R-squared': r2_val,
    'Test MSE': mse_test,
    'Test RMSE': rmse_test,
    'Test R-squared': r2_test
}

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step 


{'Validation MSE': 362.503116350354,
 'Validation RMSE': 19.039514603853586,
 'Validation R-squared': 0.48855252269089033,
 'Test MSE': 385.1344660939575,
 'Test RMSE': 19.624843084569047,
 'Test R-squared': 0.5077164534340907}

In [130]:
from tensorflow.keras.models import Sequential

# Assuming `model` is your trained model

dnn_weights = test_model.get_weights()

# Display weights for each layer
for i, layer_weights in enumerate(dnn_weights):
    print(f"Weights of layer {i}:")
    print(layer_weights)

Weights of layer 0:
[[-0.00385573 -0.05515384  0.3191799  -0.11393119  0.05338969 -0.03676246
   0.0186104   0.45700106  0.07281897 -0.03011778  0.26674896  0.03673965
   0.05050101  0.01797141 -0.08175103  0.03709345 -0.03161908  0.45877194
  -0.1987065  -0.20256376  0.2924498   0.12093332  0.22305241  0.43308768
   0.00798097  0.4722086  -0.08382563 -0.04708357 -0.0995295   0.2253246 ]
 [ 0.36439928 -0.02049484 -0.370918    0.0251051  -0.44439438  0.1464737
   0.2501921   0.13649005  0.32543635  0.20248006  0.0600948  -0.46689448
   0.13686296  0.05366354  0.0227036  -0.24903503  0.2589822   0.22576919
  -0.4794699   0.09905642  0.18330945  0.20112158 -0.15712701  0.5032695
  -0.10956576  0.33978534  0.06447751 -0.30660376 -0.3669562   0.19377914]
 [-0.10642929  0.16490076  0.3375029   0.42796132 -0.25210428  0.06723152
   0.05112491 -0.12539527  0.31637976  0.24447607 -0.04945817  0.24902871
   0.03780613 -0.07357731 -0.0856346  -0.10824379 -0.3375201  -0.02594751
   0.19921565  0.1