<h1 align="center"><font color="yellow">Logistic Regression Classifier with Pytorch</font></h1>

<font color="yellow">Data Scientist.: PhD.Eddy Giusepe Chirinos Isidro</font>

In [3]:
# Verificamos as bibliotecas instaladas

%load_ext watermark 
%watermark -v -p numpy,pandas,matplotlib,torch

The watermark extension is already loaded. To reload it, use:
  %reload_ext watermark
Python implementation: CPython
Python version       : 3.9.13
IPython version      : 8.13.2

numpy     : 1.24.3
pandas    : 2.0.1
matplotlib: 3.7.1
torch     : 2.0.1



# Nosso Dataset

In [13]:
import pandas as pd


df = pd.read_csv('../perceptron/perceptron_toydata-truncated.txt', sep='\t')
df.head()


Unnamed: 0,x1,x2,label
0,0.77,-1.14,0
1,-0.33,1.44,0
2,0.91,-3.07,0
3,-0.37,-1.91,0
4,-0.63,-1.53,0


In [14]:
X_train = df[["x1", "x2"]].values
y_train = df["label"].values

In [15]:
X_train

array([[ 0.77, -1.14],
       [-0.33,  1.44],
       [ 0.91, -3.07],
       [-0.37, -1.91],
       [-0.63, -1.53],
       [ 0.39, -1.99],
       [-0.49, -2.74],
       [-0.68, -1.52],
       [-0.1 , -3.43],
       [-0.05, -1.95],
       [ 3.88,  0.65],
       [ 0.73,  2.97],
       [ 0.83,  3.94],
       [ 1.59,  1.25],
       [ 1.14,  3.91],
       [ 1.73,  2.8 ],
       [ 1.31,  1.85],
       [ 1.56,  3.85],
       [ 1.23,  2.54],
       [ 1.33,  2.03]])

In [18]:
X_train.mean(axis=0) # 0 --> Coluna

array([0.7375, 0.3975])

In [19]:
X_train = (X_train - X_train.mean(axis=0)) / X_train.std(axis=0)


In [20]:
X_train

array([[ 0.0305863 , -0.61935683],
       [-1.00464248,  0.41995414],
       [ 0.1623427 , -1.39682589],
       [-1.04228716, -0.92953879],
       [-1.2869776 , -0.77646198],
       [-0.32703818, -0.96176548],
       [-1.15522121, -1.26389077],
       [-1.33403345, -0.77243364],
       [-0.78818555, -1.54184603],
       [-0.7411297 , -0.94565213],
       [ 2.95746041,  0.10171551],
       [-0.00705838,  1.03628972],
       [ 0.08705333,  1.42703842],
       [ 0.80230231,  0.34341574],
       [ 0.37879962,  1.41495341],
       [ 0.9340587 ,  0.96780799],
       [ 0.53878953,  0.58511596],
       [ 0.77406879,  1.39078338],
       [ 0.46350016,  0.86307122],
       [ 0.55761187,  0.65762603]])

# Implementamos o Modelo

In [21]:
import torch

class LogisticRegression(torch.nn.Module):

    def __init__(self, num_features):
        super().__init__()
        self.linear = torch.nn.Linear(num_features, 1)

    def forward(self, x):
        logits = self.linear(x)
        probas = torch.sigmoid(logits)
        return probas   


In [22]:
torch.manual_seed(1)


model = LogisticRegression(num_features=2)


In [23]:
x = torch.tensor([1.1, 2.1])

with torch.no_grad():
    proba = model(x)
    
print(proba)

tensor([0.4033])


# Definindo nosso Dataloader

In [24]:
from torch.utils.data import Dataset, DataLoader


class MyDataset(Dataset):
    def __init__(self, X, y):
        """X --> Tensor features e y --> Tensor Label"""

        self.features = torch.tensor(X, dtype=torch.float32)
        self.labels = torch.tensor(y, dtype=torch.float32)

    def __getitem__(self, index):
        x = self.features[index]
        y = self.labels[index]        
        return x, y

    def __len__(self):
        return self.labels.shape[0]
    


train_ds = MyDataset(X_train, y_train)


train_loader = DataLoader(dataset=train_ds,
                          batch_size=10,
                          shuffle=True,
                          num_workers=8 # A minha máquina tem 8 Workers
                         )


# O Loop de Treinamento

In [26]:
import torch.nn.functional as F


torch.manual_seed(1)
model = LogisticRegression(num_features=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.05)

num_epochs = 200

for epoch in range(num_epochs):
    
    model = model.train()
    for batch_idx, (features, class_labels) in enumerate(train_loader):

        probas = model(features)
        
        loss = F.binary_cross_entropy(probas, class_labels.view(probas.shape))
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        ### LOGGING
        print(f'Epoch: {epoch+1:03d}/{num_epochs:03d}'
               f' | Batch {batch_idx:03d}/{len(train_loader):03d}'
               f' | Loss: {loss:.2f}')

Epoch: 001/200 | Batch 000/002 | Loss: 0.67
Epoch: 001/200 | Batch 001/002 | Loss: 0.73
Epoch: 002/200 | Batch 000/002 | Loss: 0.67
Epoch: 002/200 | Batch 001/002 | Loss: 0.67
Epoch: 003/200 | Batch 000/002 | Loss: 0.60
Epoch: 003/200 | Batch 001/002 | Loss: 0.68
Epoch: 004/200 | Batch 000/002 | Loss: 0.69
Epoch: 004/200 | Batch 001/002 | Loss: 0.54
Epoch: 005/200 | Batch 000/002 | Loss: 0.61
Epoch: 005/200 | Batch 001/002 | Loss: 0.57
Epoch: 006/200 | Batch 000/002 | Loss: 0.59
Epoch: 006/200 | Batch 001/002 | Loss: 0.54
Epoch: 007/200 | Batch 000/002 | Loss: 0.51
Epoch: 007/200 | Batch 001/002 | Loss: 0.58
Epoch: 008/200 | Batch 000/002 | Loss: 0.51
Epoch: 008/200 | Batch 001/002 | Loss: 0.54
Epoch: 009/200 | Batch 000/002 | Loss: 0.51
Epoch: 009/200 | Batch 001/002 | Loss: 0.49
Epoch: 010/200 | Batch 000/002 | Loss: 0.53
Epoch: 010/200 | Batch 001/002 | Loss: 0.44
Epoch: 011/200 | Batch 000/002 | Loss: 0.42
Epoch: 011/200 | Batch 001/002 | Loss: 0.52
Epoch: 012/200 | Batch 000/002 |

# Avaliando nossos Resultados

In [27]:
probas

tensor([[0.0080],
        [0.0163],
        [0.0150],
        [0.0092],
        [0.0191],
        [0.0716],
        [0.9852],
        [0.8961],
        [0.8798],
        [0.9704]], grad_fn=<SigmoidBackward0>)

In [30]:
# Se probas > 0.5 --> classe=1 e se probas não é > 0.5 --> classe = 0
pred = torch.where(probas > 0.5, 1, 0) # Threshold=0.5. 1 e 0 --> são as nossas Classes 
pred

tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [1],
        [1],
        [1]])

In [31]:
class_labels.view(pred.shape).to(pred.dtype)


tensor([[0],
        [0],
        [0],
        [0],
        [0],
        [0],
        [1],
        [1],
        [1],
        [1]])

In [32]:
def compute_accuracy(model, dataloader):

    model = model.eval()
    
    correct = 0.0
    total_examples = 0
    
    for idx, (features, class_labels) in enumerate(dataloader):
        
        with torch.no_grad():
            probas = model(features)
        
        pred = torch.where(probas > 0.5, 1, 0)
        lab = class_labels.view(pred.shape).to(pred.dtype)

        compare = lab == pred # Sabemos que em Python: int(True) = 1 e int(False) = 0
        correct += torch.sum(compare)
        total_examples += len(compare)

    return correct / total_examples

In [33]:
train_acc = compute_accuracy(model, train_loader)

In [34]:

print(f"Accuracy: {train_acc*100}%")

Accuracy: 100.0%
