In [18]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import pickle as pkl
import warnings

In [19]:
warnings.filterwarnings('ignore')
df = pd.read_csv('./data/Prostate_Cancer.csv')
df

Unnamed: 0,id,diagnosis_result,radius,texture,perimeter,area,smoothness,compactness,symmetry,fractal_dimension
0,1,M,23,12,151,954,0.143,0.278,0.242,0.079
1,2,B,9,13,133,1326,0.143,0.079,0.181,0.057
2,3,M,21,27,130,1203,0.125,0.160,0.207,0.060
3,4,M,14,16,78,386,0.070,0.284,0.260,0.097
4,5,M,9,19,135,1297,0.141,0.133,0.181,0.059
...,...,...,...,...,...,...,...,...,...,...
95,96,M,23,16,132,1264,0.091,0.131,0.210,0.056
96,97,B,22,14,78,451,0.105,0.071,0.190,0.066
97,98,B,19,27,62,295,0.102,0.053,0.135,0.069
98,99,B,21,24,74,413,0.090,0.075,0.162,0.066


In [20]:
df.columns

Index(['id', 'diagnosis_result', 'radius', 'texture', 'perimeter', 'area',
       'smoothness', 'compactness', 'symmetry', 'fractal_dimension'],
      dtype='object')

In [21]:
df['diagnosis_result'].value_counts()	

M    62
B    38
Name: diagnosis_result, dtype: int64

## Undersampling

In [22]:
from sklearn.metrics import classification_report

In [23]:
def convert(arr):
    new_pred =[]
    for i in arr:
        if i>0.5:
            new_pred.append(1)
        else:
            new_pred.append(0)
    return new_pred
            

In [24]:
def Ann(x_train,y_train,x_test,y_test,shape,opt,loss_function,ep):
    model= keras.Sequential([
        keras.layers.Dense(100,input_shape=(shape,),activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(10,activation='relu'),
        keras.layers.Dropout(0.2),
        keras.layers.Dense(1,activation='sigmoid')
    ])
    model.compile(
        optimizer=opt,
        loss=loss_function,
        metrics=['accuracy']
    )
    model.fit(x_train,y_train,epochs=ep)
    y_pred = model.predict(x_test)
    new_pred =convert(y_pred)
    print(classification_report(y_test,new_pred))
    return model

In [25]:
samples_to_take=df['diagnosis_result'][df['diagnosis_result']=='B'].value_counts()
samples_to_take[0]

38

In [26]:
final_df_m =df[df['diagnosis_result']=='M'].sample(samples_to_take[0])
final_df_b= df[df['diagnosis_result']=='B']


In [27]:
final_df_b

Unnamed: 0,id,diagnosis_result,radius,texture,perimeter,area,smoothness,compactness,symmetry,fractal_dimension
1,2,B,9,13,133,1326,0.143,0.079,0.181,0.057
5,6,B,25,25,83,477,0.128,0.17,0.209,0.076
12,13,B,14,15,132,1123,0.097,0.246,0.24,0.078
19,20,B,17,11,87,566,0.098,0.081,0.189,0.058
20,21,B,16,14,86,520,0.108,0.127,0.197,0.068
21,22,B,17,24,60,274,0.102,0.065,0.182,0.069
37,38,B,21,11,83,524,0.09,0.038,0.147,0.059
46,47,B,22,12,52,202,0.086,0.059,0.177,0.065
48,49,B,20,21,78,449,0.103,0.091,0.168,0.06
49,50,B,25,11,87,561,0.088,0.077,0.181,0.057


In [28]:
under_sample_df = pd.concat([final_df_m,final_df_b],axis='rows')
under_sample_df

Unnamed: 0,id,diagnosis_result,radius,texture,perimeter,area,smoothness,compactness,symmetry,fractal_dimension
16,17,M,10,16,95,685,0.099,0.072,0.159,0.059
82,83,M,10,15,172,1878,0.106,0.267,0.183,0.068
39,40,M,10,14,88,559,0.102,0.126,0.172,0.064
29,30,M,11,16,115,955,0.098,0.116,0.174,0.061
22,23,M,20,27,103,704,0.107,0.214,0.252,0.070
...,...,...,...,...,...,...,...,...,...,...
92,93,B,14,14,85,552,0.074,0.051,0.139,0.053
93,94,B,10,17,87,555,0.102,0.082,0.164,0.057
96,97,B,22,14,78,451,0.105,0.071,0.190,0.066
97,98,B,19,27,62,295,0.102,0.053,0.135,0.069


In [29]:
under_sample_df.drop('id',axis=1,inplace=True)

In [30]:
x= under_sample_df.drop('diagnosis_result',axis=1)
y =under_sample_df['diagnosis_result']


In [31]:
y_new= pd.get_dummies(y,drop_first=True)
y_new

Unnamed: 0,M
16,1
82,1
39,1
29,1
22,1
...,...
92,0
93,0
96,0
97,0


In [32]:
x_train,x_test,y_train,y_test = train_test_split(x,y_new,test_size=0.2,stratify=y_new)

In [33]:
under_sample_model = Ann(x_train,y_train,x_test,y_test,8,'adam','binary_crossentropy',10)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.00      0.00      0.00         8
           1       0.50      1.00      0.67         8

    accuracy                           0.50        16
   macro avg       0.25      0.50      0.33        16
weighted avg       0.25      0.50      0.33        16



## Over Sampling