In [1]:
from sklearn.datasets import make_classification
import torch

In [2]:
X, y = make_classification(
    n_samples=10,
    n_features=2,
    n_informative=2,
    n_redundant=0,
    n_classes=2,
    random_state=42
)

In [4]:
X

array([[ 1.06833894, -0.97007347],
       [-1.14021544, -0.83879234],
       [-2.8953973 ,  1.97686236],
       [-0.72063436, -0.96059253],
       [-1.96287438, -0.99225135],
       [-0.9382051 , -0.54304815],
       [ 1.72725924, -1.18582677],
       [ 1.77736657,  1.51157598],
       [ 1.89969252,  0.83444483],
       [-0.58723065, -1.97171753]])

In [5]:
y

array([1, 0, 0, 0, 0, 1, 1, 1, 1, 0])

In [6]:
x = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)

In [7]:
from torch.utils.data import Dataset, DataLoader

In [14]:
class CustomDataset(Dataset):
  def __init__(self, features, labels):
    self.features = features
    self.labels = labels

  def __len__(self):
    return self.features.shape[0]

  def __getitem__(self, idx):
    return self.features[idx], self.labels[idx]



In [15]:
dataset = CustomDataset(x, y)

In [16]:
len(dataset)

10

In [17]:
dataset[0]

(tensor([ 1.0683, -0.9701]), tensor(1))

In [20]:
dataloader = DataLoader(dataset, batch_size=2, shuffle=True)

In [21]:
for batch_features, batch_labels in dataloader:
  print(batch_features)
  print(batch_labels)
  print("-"*50)

tensor([[-0.9382, -0.5430],
        [ 1.8997,  0.8344]])
tensor([1, 1])
--------------------------------------------------
tensor([[-0.7206, -0.9606],
        [-1.1402, -0.8388]])
tensor([0, 0])
--------------------------------------------------
tensor([[-2.8954,  1.9769],
        [ 1.0683, -0.9701]])
tensor([0, 1])
--------------------------------------------------
tensor([[-1.9629, -0.9923],
        [-0.5872, -1.9717]])
tensor([0, 0])
--------------------------------------------------
tensor([[ 1.7774,  1.5116],
        [ 1.7273, -1.1858]])
tensor([1, 1])
--------------------------------------------------


In [22]:
import numpy as np
import pandas as pd
import torch
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder

In [23]:
from google.colab import files

# This will open a file picker on Colab to select file(s) from your desktop
uploaded = files.upload()

# Save uploaded files into the Colab directory
for filename in uploaded.keys():
    print(f'Saved file: {filename}')


Saving breast-cancer.csv to breast-cancer.csv
Saved file: breast-cancer.csv


In [24]:
df = pd.read_csv("breast-cancer.csv")

In [25]:
df.drop(columns=['id'], inplace = True)

In [26]:
X = df.iloc[:, 1:]               # all numeric
y = df.iloc[:, 0]                # 'diagnosis' column (object)

In [27]:
le = LabelEncoder()
y_label = le.fit_transform(y)

In [28]:
X_train, X_test, y_train, y_test = train_test_split(X, y_label, test_size=0.2, random_state=42)

In [29]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(X_train)
x_test_scaled = scaler.transform(X_test)


In [40]:
x_train_tensor = torch.from_numpy(x_train_scaled).float()
y_train_tensor = torch.from_numpy(y_train).float()
x_test_tensor = torch.from_numpy(x_test_scaled).float()
y_test_tensor = torch.from_numpy(y_test).float()

In [41]:
from torch.utils.data import Dataset, DataLoader

class CustomDataset(Dataset):

  def __init__(self, features, labels):

    self.features = features
    self.labels = labels

  def __len__(self):

    return len(self.features)

  def __getitem__(self, idx):

    return self.features[idx], self.labels[idx]



In [42]:
train_dataset = CustomDataset(x_train_tensor, y_train_tensor)
test_dataset = CustomDataset(x_test_tensor, y_test_tensor)

In [43]:
train_dataset[10]

(tensor([-0.4976,  0.6137, -0.4981, -0.5310, -0.5769, -0.1749, -0.3622, -0.2849,
          0.4335,  0.1782, -0.3684,  0.5531, -0.3167, -0.4052,  0.0403, -0.0380,
         -0.1804,  0.1648, -0.1217,  0.2308, -0.5004,  0.8194, -0.4692, -0.5331,
         -0.0491, -0.0416, -0.1491,  0.0968,  0.1062,  0.4904]),
 tensor(0.))

In [44]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=True)

In [45]:
import torch.nn as nn


class MySimpleNN(nn.Module):

  def __init__(self, num_features):

    super().__init__()
    self.linear = nn.Linear(num_features, 1)
    self.sigmoid = nn.Sigmoid()

  def forward(self, features):

    out = self.linear(features)
    out = self.sigmoid(out)

    return out

In [46]:
learning_rate = 0.1
epochs = 25

In [47]:
#define lossfunction
loss_function = nn.BCELoss()

In [48]:
model = MySimpleNN(x_train_tensor.shape[1])

optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

for epoch in range(epochs):
  for batch_features, batch_labels in train_loader:

    y_pred = model(batch_features)

    loss = loss_function(y_pred, batch_labels.view(-1, 1))

    optimizer.zero_grad()

    loss.backward()

    optimizer.step()

  # print loss in each epoch
  print(f'Epoch: {epoch + 1}, Loss: {loss.item()}')




Epoch: 1, Loss: 0.1817120611667633
Epoch: 2, Loss: 0.10303515940904617
Epoch: 3, Loss: 0.14077694714069366
Epoch: 4, Loss: 0.14630459249019623
Epoch: 5, Loss: 0.27691617608070374
Epoch: 6, Loss: 0.033886149525642395
Epoch: 7, Loss: 0.08010715246200562
Epoch: 8, Loss: 0.2859290540218353
Epoch: 9, Loss: 0.1062282845377922
Epoch: 10, Loss: 0.06700427085161209
Epoch: 11, Loss: 0.10596829652786255
Epoch: 12, Loss: 0.20085836946964264
Epoch: 13, Loss: 0.08329953998327255
Epoch: 14, Loss: 0.01881241239607334
Epoch: 15, Loss: 0.17649798095226288
Epoch: 16, Loss: 0.10028830915689468
Epoch: 17, Loss: 0.7809843420982361
Epoch: 18, Loss: 0.10422071069478989
Epoch: 19, Loss: 0.1039661318063736
Epoch: 20, Loss: 0.023208264261484146
Epoch: 21, Loss: 0.026988742873072624
Epoch: 22, Loss: 0.02715689316391945
Epoch: 23, Loss: 0.023730145767331123
Epoch: 24, Loss: 0.2730981111526489
Epoch: 25, Loss: 0.07798261940479279
