In [1]:
import pandas as pd
import numpy as np 
import torch
import torch.nn as nn
from sklearn.preprocessing import StandardScaler
from torch.utils.data import Dataset

## Lectura del dataset

In [2]:
#nombres de columnas especificadas en archivo 'breast-cancer-wisconsin.names.txt'
cols_names = ['id ticket','Clump Thickness','Uniformity of Cell Size','Uniformity of Cell Shape',
             'Marginal Adhesion','Single Epithelial Cell Size','Bare Nuclei','Bland Chromatin',
             'Normal Nucleoli','Mitoses','Class']
df_cancer = pd.read_csv('cancer/breast-cancer-wisconsin.data.txt',names=cols_names)
df_cancer.head()

Unnamed: 0,id ticket,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,1000025,5,1,1,1,2,1,3,1,1,2
1,1002945,5,4,4,5,7,10,3,2,1,2
2,1015425,3,1,1,1,2,2,3,1,1,2
3,1016277,6,8,8,1,3,4,3,7,1,2
4,1017023,4,1,1,3,2,1,3,1,1,2


## Limpieza de datos

In [3]:
#quitamos la columna que no nocesitamos
df_cancer=df_cancer.drop('id ticket',axis=1)

In [4]:
#verificamos que todos los valores sean validos
df_cancer.isna().sum()

Clump Thickness                0
Uniformity of Cell Size        0
Uniformity of Cell Shape       0
Marginal Adhesion              0
Single Epithelial Cell Size    0
Bare Nuclei                    0
Bland Chromatin                0
Normal Nucleoli                0
Mitoses                        0
Class                          0
dtype: int64

In [5]:
df_cancer.head(2)

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,5,1,1,1,2,1,3,1,1,2
1,5,4,4,5,7,10,3,2,1,2


In [6]:
index_to_drop = df_cancer[df_cancer['Bare Nuclei']=='?'].index #elimanos los caracteres '?'
df_cancer.drop(index_to_drop,inplace=True)

In [7]:
#cambiamos la clase a 0 y 1
df_cancer[df_cancer['Class']==2]=0
df_cancer[df_cancer['Class']==4]=1
df_cancer

Unnamed: 0,Clump Thickness,Uniformity of Cell Size,Uniformity of Cell Shape,Marginal Adhesion,Single Epithelial Cell Size,Bare Nuclei,Bland Chromatin,Normal Nucleoli,Mitoses,Class
0,0,0,0,0,0,0,0,0,0,0
1,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,0,0,0,0,0
3,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...
694,0,0,0,0,0,0,0,0,0,0
695,0,0,0,0,0,0,0,0,0,0
696,1,1,1,1,1,1,1,1,1,1
697,1,1,1,1,1,1,1,1,1,1


## Separación del dataset "features and labels"

In [8]:
x = df_cancer.iloc[:,0:-1].values

In [9]:
y = df_cancer['Class']

In [10]:
y = np.array(y)

In [11]:
print(type(x),type(y))

<class 'numpy.ndarray'> <class 'numpy.ndarray'>


## Normalizado/Escalado de los datos

In [12]:
stdsc = StandardScaler()
x = stdsc.fit_transform(x)

## Convertimos a tensores de pytorch

In [13]:
x = torch.tensor(x)
y = torch.tensor(y)

In [14]:
y = y.unsqueeze(1)

In [15]:
print(x.shape)
print(y.shape)

torch.Size([683, 9])
torch.Size([683, 1])


## Dividir data entre entrenamiento y prueba

In [16]:
rows_of_train = int(x.shape[0]*.8)
#rows_of_test = x.shape[0]-rows_of_train

In [17]:
x_train = x[:rows_of_train]
y_train = y[:rows_of_train]

In [18]:
x_test = x[rows_of_train:]
y_test = y[rows_of_train:]

In [19]:
class Dataset(Dataset):
    def __init__(self,x,y):
        self.x = x
        self.y = y
    def __getitem__(self,index):
        return self.x[index],self.y[index]
    def __len__(self):
        return len(self.x)

In [20]:
dataset = Dataset(x_train,y_train)

In [21]:
len(dataset)

546

In [22]:
train_loader = torch.utils.data.DataLoader(dataset=dataset,
                                           batch_size=32,
                                           shuffle=True)

In [23]:
print("There is {} batches in the dataset".format(len(train_loader)))
for x,y  in train_loader:
    print("For one iteration (batch), there is:")
    print("Data:    {}".format(x.shape))
    print("Labels:  {}".format(y.shape))
    break

There is 18 batches in the dataset
For one iteration (batch), there is:
Data:    torch.Size([32, 9])
Labels:  torch.Size([32, 1])


## Creacción de la Red Neuronal

In [24]:
class Model(nn.Module):
    def __init__(self,input_features):
        super(Model,self).__init__()
        self.fc1 = nn.Linear(input_features,20)
        self.fc2 = nn.Linear(20, 10)
        self.fc3 = nn.Linear(10, 5)
        self.fc4 = nn.Linear(5, 1)
        self.sigmoid = nn.Sigmoid()
        self.tanh = nn.Tanh()
    def forward(self, x):
        out = self.fc1(x)
        out = self.tanh(out)
        out = self.fc2(out)
        out = self.tanh(out)
        out = self.fc3(out)
        out = self.tanh(out)
        out = self.fc4(out)
        out = self.sigmoid(out)
        return out

In [32]:
model = Model(x_train.shape[1])
#In Binary Cross Entropy: the input and output should have the same shape 
#size_average = True --> the losses are averaged over observations for each minibatch
fn_loss = torch.nn.BCELoss(reduction='mean')   
# We will use SGD with momentum with a learning rate of 0.1
optimizer = torch.optim.Adam(model.parameters(), lr=0.1)

In [34]:

# Train the network 
num_epochs = 200
for epoch in range(num_epochs):
    for inputs,labels in train_loader:

        inputs = inputs.float()
        labels = labels.float()

        # Feed Forward
        output = model(inputs)
        
        # Loss Calculation
        loss = fn_loss(output, labels)
    
        # Clear the gradient buffer (we don't want to accumulate gradients)
        optimizer.zero_grad()
        
        # Backpropagation 
        loss.backward()
        # Weight Update: w <-- w - lr * gradient
        optimizer.step()
        #print("Epoch {}/{}, Loss: {:.3f}".format(epoch,num_epochs, loss))

In [58]:
print(x_test[6],y_test[6])
with torch.no_grad():
    for i in range(x_test.shape[0]):
        feature = x_test[i].float()
        output = model(feature)
        if output.item()<1e-5:
            output=0
        elif output>0.999:
            output = 1
        print(output,' ',y_test[i].item())

tensor([-0.7337, -0.7337, -0.7337, -0.7337, -0.7337, -0.7337, -0.7337, -0.7337,
        -0.7337], dtype=torch.float64) tensor([0])
0   0
0   0
0   0
0   0
1   1
0   0
0   0
1   1
1   1
1   1
1   1
0   0
0   0
1   1
0   0
0   0
0   0
0   0
0   0
0   0
1   1
1   1
0   0
0   0
0   0
1   1
0   0
1   1
0   0
1   1
1   1
1   1
0   0
1   1
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
1   1
1   1
1   1
0   0
0   0
1   1
0   0
1   1
1   1
1   1
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
1   1
0   0
0   0
0   0
0   0
0   0
0   0
1   1
0   0
0   0
1   1
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
1   1
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
1   1
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
1   1
1   1
1   1
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
1   1
1   1
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
0   0
1   1
0   0
0   0
0   0
0   0
1   1
1   1
1   1
