<a href="https://colab.research.google.com/github/Dhruvit-Agrawal/PyTorch/blob/main/2_NN_WorkFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, LabelEncoder

In [None]:
import pandas as pd

In [None]:
df=pd.read_csv("https://raw.githubusercontent.com/gscdit/Breast-Cancer-Detection/refs/heads/master/data.csv")
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [None]:
df.drop(columns=["id",'Unnamed: 32'],inplace=True)

In [None]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 569 entries, 0 to 568
Data columns (total 31 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   diagnosis                569 non-null    object 
 1   radius_mean              569 non-null    float64
 2   texture_mean             569 non-null    float64
 3   perimeter_mean           569 non-null    float64
 4   area_mean                569 non-null    float64
 5   smoothness_mean          569 non-null    float64
 6   compactness_mean         569 non-null    float64
 7   concavity_mean           569 non-null    float64
 8   concave points_mean      569 non-null    float64
 9   symmetry_mean            569 non-null    float64
 10  fractal_dimension_mean   569 non-null    float64
 11  radius_se                569 non-null    float64
 12  texture_se               569 non-null    float64
 13  perimeter_se             569 non-null    float64
 14  area_se                  5

In [None]:
df.nunique()

Unnamed: 0,0
diagnosis,2
radius_mean,456
texture_mean,479
perimeter_mean,522
area_mean,539
smoothness_mean,474
compactness_mean,537
concavity_mean,537
concave points_mean,542
symmetry_mean,432


#train test split

In [None]:
#train test split
x=df.drop(columns=["diagnosis"])
y=df["diagnosis"]

x_train,x_test,y_train,y_test=train_test_split(x,y, test_size=0.2, random_state=42)
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(455, 30)
(114, 30)
(455,)
(114,)


#scaling

In [None]:
scaler=StandardScaler()
x_train=scaler.fit_transform(x_train)
x_test=scaler.transform(x_test)

#label encoding

In [None]:
encoder=LabelEncoder()
y_train=encoder.fit_transform(y_train)
y_test=encoder.transform(y_test)

#numpy to tensor

In [None]:
x_train_tensor=torch.from_numpy(x_train.astype(np.float32))
x_test_tensor=torch.from_numpy(x_test.astype(np.float32))
y_train_tensor=torch.from_numpy(y_train.astype(np.float32))
y_test_tensor=torch.from_numpy(y_test.astype(np.float32))

In [None]:
print(x_train_tensor.shape)
print(x_test_tensor.shape)
print(y_train_tensor.shape)
print(y_test_tensor.shape)

torch.Size([455, 30])
torch.Size([114, 30])
torch.Size([455])
torch.Size([114])


#model

In [None]:
class MyModel():
  def __init__(self,x):

    self.w=torch.randn(x.shape[1],1,requires_grad=True)   #initialising weights equal to no.of cols in x
    self.b=torch.randn(1,requires_grad=True)              #initialising bias equal to no.of neurons

  def forward(self,x):
    z=torch.matmul(x,self.w)+self.b
    y_hat=torch.sigmoid(z)
    return y_hat

  def loss(self,y_hat,y):
    y=y.unsqueeze(1)
    loss=torch.nn.functional.binary_cross_entropy(y_hat,y)
    return loss

  def train(self,x,y,epochs=25,lr=0.01):

    for epoch in range(epochs):
      #forward
      y_hat=self.forward(x)

      #loss
      loss=self.loss(y_hat,y)
      print("epoch:",epoch,"loss:",loss.item())

      #backward
      loss.backward()

      #parameter update
      with torch.no_grad():           # not storing history or gradients
        self.w-=lr*self.w.grad
        self.b-=lr*self.b.grad

      #zeroing gradients
      self.w.grad.zero_()
      self.b.grad.zero_()

  def predict(self,x):
    y_pred=self.forward(x)
    y_pred=torch.where(y_pred>0.5,1,0)
    return y_pred


In [None]:
#creaing instance
model=MyModel(x_train_tensor)

In [None]:
#train
model.train(x_train_tensor,y_train_tensor)

epoch: 0 loss: 2.322831869125366
epoch: 1 loss: 2.3143138885498047
epoch: 2 loss: 2.3060481548309326
epoch: 3 loss: 2.297900915145874
epoch: 4 loss: 2.289658546447754
epoch: 5 loss: 2.2816646099090576
epoch: 6 loss: 2.2737467288970947
epoch: 7 loss: 2.264230251312256
epoch: 8 loss: 2.256490707397461
epoch: 9 loss: 2.248683452606201
epoch: 10 loss: 2.2410876750946045
epoch: 11 loss: 2.048707962036133
epoch: 12 loss: 2.0411486625671387
epoch: 13 loss: 2.0336806774139404
epoch: 14 loss: 2.02614688873291
epoch: 15 loss: 2.018704414367676
epoch: 16 loss: 2.0114593505859375
epoch: 17 loss: 2.004161834716797
epoch: 18 loss: 1.9970778226852417
epoch: 19 loss: 1.989938735961914
epoch: 20 loss: 1.982981562614441
epoch: 21 loss: 1.9759870767593384
epoch: 22 loss: 1.9690673351287842
epoch: 23 loss: 1.962329387664795
epoch: 24 loss: 1.9555448293685913


In [None]:
#predict
y_pred=model.predict(x_test_tensor)

#accuracy
accuracy=torch.mean((y_pred.float()==y_test_tensor.float()).float())
print("accuracy:",accuracy)

accuracy: tensor(0.5194)
