In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import requests

In [0]:
### Download data from google drive.

import requests

def download_file_from_google_drive(id, destination):
    URL = "https://docs.google.com/uc?export=download"

    session = requests.Session()

    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)

    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)

    save_response_content(response, destination)    

def get_confirm_token(response):
    for key, value in response.cookies.items():
        if key.startswith('download_warning'):
            return value

    return None

def save_response_content(response, destination):
    CHUNK_SIZE = 32768

    with open(destination, "wb") as f:
        for chunk in response.iter_content(CHUNK_SIZE):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                
if __name__ == "__main__":
    file_id = '1ih8PomVE7L3z_xReHAEsq3hk-0O1Uo12'
    destination = 'data.csv'
    download_file_from_google_drive(file_id, destination)

In [0]:
# Importing and cleaning data using pandas library
data = pd.read_csv('data.csv')
del data['Unnamed: 32']


In [4]:
## Observe the data
data

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,fractal_dimension_mean,radius_se,texture_se,perimeter_se,area_se,smoothness_se,compactness_se,concavity_se,concave points_se,symmetry_se,fractal_dimension_se,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,842302,M,17.99,10.38,122.80,1001.0,0.11840,0.27760,0.30010,0.14710,0.2419,0.07871,1.0950,0.9053,8.589,153.40,0.006399,0.04904,0.05373,0.01587,0.03003,0.006193,25.380,17.33,184.60,2019.0,0.16220,0.66560,0.7119,0.2654,0.4601,0.11890
1,842517,M,20.57,17.77,132.90,1326.0,0.08474,0.07864,0.08690,0.07017,0.1812,0.05667,0.5435,0.7339,3.398,74.08,0.005225,0.01308,0.01860,0.01340,0.01389,0.003532,24.990,23.41,158.80,1956.0,0.12380,0.18660,0.2416,0.1860,0.2750,0.08902
2,84300903,M,19.69,21.25,130.00,1203.0,0.10960,0.15990,0.19740,0.12790,0.2069,0.05999,0.7456,0.7869,4.585,94.03,0.006150,0.04006,0.03832,0.02058,0.02250,0.004571,23.570,25.53,152.50,1709.0,0.14440,0.42450,0.4504,0.2430,0.3613,0.08758
3,84348301,M,11.42,20.38,77.58,386.1,0.14250,0.28390,0.24140,0.10520,0.2597,0.09744,0.4956,1.1560,3.445,27.23,0.009110,0.07458,0.05661,0.01867,0.05963,0.009208,14.910,26.50,98.87,567.7,0.20980,0.86630,0.6869,0.2575,0.6638,0.17300
4,84358402,M,20.29,14.34,135.10,1297.0,0.10030,0.13280,0.19800,0.10430,0.1809,0.05883,0.7572,0.7813,5.438,94.44,0.011490,0.02461,0.05688,0.01885,0.01756,0.005115,22.540,16.67,152.20,1575.0,0.13740,0.20500,0.4000,0.1625,0.2364,0.07678
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
564,926424,M,21.56,22.39,142.00,1479.0,0.11100,0.11590,0.24390,0.13890,0.1726,0.05623,1.1760,1.2560,7.673,158.70,0.010300,0.02891,0.05198,0.02454,0.01114,0.004239,25.450,26.40,166.10,2027.0,0.14100,0.21130,0.4107,0.2216,0.2060,0.07115
565,926682,M,20.13,28.25,131.20,1261.0,0.09780,0.10340,0.14400,0.09791,0.1752,0.05533,0.7655,2.4630,5.203,99.04,0.005769,0.02423,0.03950,0.01678,0.01898,0.002498,23.690,38.25,155.00,1731.0,0.11660,0.19220,0.3215,0.1628,0.2572,0.06637
566,926954,M,16.60,28.08,108.30,858.1,0.08455,0.10230,0.09251,0.05302,0.1590,0.05648,0.4564,1.0750,3.425,48.55,0.005903,0.03731,0.04730,0.01557,0.01318,0.003892,18.980,34.12,126.70,1124.0,0.11390,0.30940,0.3403,0.1418,0.2218,0.07820
567,927241,M,20.60,29.33,140.10,1265.0,0.11780,0.27700,0.35140,0.15200,0.2397,0.07016,0.7260,1.5950,5.772,86.22,0.006522,0.06158,0.07117,0.01664,0.02324,0.006185,25.740,39.42,184.60,1821.0,0.16500,0.86810,0.9387,0.2650,0.4087,0.12400


In [0]:
## We have left first two columns and taken other columns as input features
X = data.iloc[:, 2:].values

# 2nd column is output labels
y = data.iloc[:, 1].values
print(y)

In [0]:
## Convert the output labels to numbers : M->0; B-> 1
## Store the output in Y_v
Y_v = []

for i in range(len(y)):
  if(y[i]=='M'):
    Y_v.append(0)
  else:
    Y_v.append(1)
    
print(Y_v)


In [0]:
Y_v

In [0]:
### One-hot encode Y_v
def oneHot(y, Ny):
    '''
    Input:
        y: an int in {0, 1}
        Ny: Number of classes, e.g., 2 here.
    Output:
        Y: a vector of Ny (=2) tuples
    '''
    Y=np.zeros(Ny)
    if(y==0):
      Y[0]=1
    else:
      Y[1]=1
    return Y
    
    ##Y=keras.utils.to_categorical(y,num_classes=Ny);
    #return Y
    


In [0]:
from sklearn.model_selection import train_test_split
### Split data into train and test. Keep 10% of samples for testing
## Divide the data into these variables - X_train, X_test, y_train, y_test

X_train, X_test, y_train, y_test=train_test_split(X,Y_v,test_size=0.1,random_state=42)
print(type(X_train))
print(type(y_train))
print(y_test)

In [0]:
## Normalize the Data
def findMeanStddev(X):
    '''
    Input: 
        X: a matrix of size (no. of samples, dimension of each sample)
    Output:
        mean: mean of samples in X (same size as X)
        stddev: element-wise std dev of sample in X (same size as X)
    '''
    mean=np.mean(X,axis=0)
    stddev=np.std(X,axis=0)
    return mean,stddev
    

def normalizeX(X, mean, stddev):
    '''
    Input:
        X: a matrix of size (no. of samples, dimension of each sample)
        mean: mean of samples in X (same size as X)
        stddev: element-wise std dev of sample in X (same size as X) 
    Output:
        Xn: X modified to have 0 mean and 1 std dev
    '''
    Xn=np.divide((X-mean),stddev)
    return Xn
    


In [20]:
"""test for normalizeX"""
#normalization
x_tr=[]
x_te=[]
'''
for i in range(len(y_train)):
  y_tr.append(oneHot(Y_v[i],2))
for i in range(len(y_test)):
  y_te.append(oneHot(Y_v[i],2))
'''  
mean,stddev=findMeanStddev(X_train)
x_tr=normalizeX(X_train,mean,stddev)
mean,stddev=findMeanStddev(X_test)
x_te=normalizeX(X_test,mean,stddev)

#conversion of lists into arrays

x_tr=np.array(x_tr)
x_te=np.array(x_te)
y_train=np.array(y_train)
y_test=np.array(y_test)
print(x_tr.shape)
print(np.array(y_train).shape)


(512, 30)
(512,)


#### Create model. 
- Choose the number of hidden layers, neurons, activations, loss function, learning rate and optimizers on your own.
- Report accuracy metric
- Use no more than 100 epochs

In [24]:
import keras

from keras.models import Sequential
from keras.layers import Dense
from keras import metrics
#model
model=Sequential()

#layers
model.add(Dense(250,activation='relu',input_shape=(x_tr.shape[1],)))
model.add(Dense(250,activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam',loss='mean_squared_error',metrics=[metrics.binary_accuracy])

#stop epochs if val_loss doesn't improve
from keras.callbacks import EarlyStopping
stop_early=EarlyStopping(patience=6)

#training
model.fit(x_tr,y_train,validation_split=0.2,epochs=30,callbacks=[stop_early])


Train on 409 samples, validate on 103 samples
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30


<keras.callbacks.History at 0x7fcc0efa3828>

In [27]:
"""Test for model"""
'''
from sklearn.metrics import confusion_matrix
pred=model.predict(x_te)
count=0
confusionMatrix=[]
confusionMatrix=confusion_matrix(y_test,pred)
n=confusionMatrix.shape[0]
for i in range(n):
  count=count+confusionMatrix[i][i]

#accuracy
accuracy=count/(n*n)
print(accuracy)
'''

accuracy=model.evaluate(x_te,y_test)
print('Accuracy:',accuracy[1])

Accuracy: 0.9649122807017544
