# Logistic Regression Using PyTorch

In this tutorial, we will explore how to build a logistic regression model using PyTorch. 

### Step 1: Import Libraries

In [1]:
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

### Step 2: Load the Dataset

In [2]:
# Load the dataset
data = pd.read_csv("https://raw.githubusercontent.com/yangliuiuk/data/main/diabetes.csv")

# Display the first few rows of the dataset
data.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


### Step 3: Preprocess the Data

In [3]:
# Split the data into features (X) and target variable (y)
X = data.drop('Outcome', axis=1)
y = data['Outcome']
X.shape

(768, 8)

In [4]:
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train # standard deviations

array([[-0.52639686, -1.15139792, -3.75268255, ..., -4.13525578,
        -0.49073479, -1.03594038],
       [ 1.58804586, -0.27664283,  0.68034485, ..., -0.48916881,
         2.41502991,  1.48710085],
       [-0.82846011,  0.56687102, -1.2658623 , ..., -0.42452187,
         0.54916055, -0.94893896],
       ...,
       [ 1.8901091 , -0.62029661,  0.89659009, ...,  1.76054443,
         1.981245  ,  0.44308379],
       [-1.13052335,  0.62935353, -3.75268255, ...,  1.34680407,
        -0.78487662, -0.33992901],
       [-1.13052335,  0.12949347,  1.43720319, ..., -1.22614383,
        -0.61552223, -1.03594038]])

In [5]:
# Convert the data to PyTorch tensors
X_train_tensor = torch.tensor(X_train.astype('float32'))
X_test_tensor = torch.tensor(X_test.astype('float32'))

X_train_tensor.shape

torch.Size([614, 8])

In [6]:
type(X_train) # numpy.ndarray
type(y_train) # pandas.core.series.Series
type(y_train.values) # numpy.ndarray
y_train.shape

(614,)

In [7]:
y_train.values.shape

(614,)

In [8]:
torch.tensor(y_train.values.astype('float32')).shape #torch.Size([614])
torch.tensor(y_train.values.astype('float32')).unsqueeze(1).shape #torch.Size([614, 1])

torch.Size([614, 1])

In [9]:
y_train_tensor = torch.tensor(y_train.values.astype('float32')).unsqueeze(1) 
# Using y_train.values is necessary because PyTorch tensors expect NumPy arrays as input, not pandas Series objects.
# X_train has already been converted to Numpy array by the scaler
y_test_tensor = torch.tensor(y_test.values.astype('float32')).unsqueeze(1)

### Step 4: Define the Logistic Regression Model

In [10]:
class LogisticRegressionModel(nn.Module):
    def __init__(self, input_dim):
        super(LogisticRegressionModel, self).__init__()
        self.linear = nn.Linear(input_dim, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        out = self.linear(x)
        out = self.sigmoid(out)
        return out

# Initialize the model
input_dim = X_train.shape[1]
model = LogisticRegressionModel(input_dim)

### Step 5: Define Loss Function and Optimizer

In [11]:
# Define the loss function and optimizer
criterion = nn.BCELoss() # Binary cross entropy loss
optimizer = optim.SGD(model.parameters(), lr=0.01)

### Step 6: Train the Model

In [12]:
# Train the model
num_epochs = 1000
for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward pass and optimization
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    # Print progress
    if (epoch+1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [100/1000], Loss: 0.5987
Epoch [200/1000], Loss: 0.5576
Epoch [300/1000], Loss: 0.5322
Epoch [400/1000], Loss: 0.5156
Epoch [500/1000], Loss: 0.5043
Epoch [600/1000], Loss: 0.4962
Epoch [700/1000], Loss: 0.4902
Epoch [800/1000], Loss: 0.4858
Epoch [900/1000], Loss: 0.4823
Epoch [1000/1000], Loss: 0.4797


### Step 7: Evaluate the Model

In [13]:
# Evaluate the model
with torch.no_grad():
    outputs = model(X_test_tensor)
    predicted = (outputs >= 0.5).float()
    accuracy = (predicted == y_test_tensor).sum().item() / len(y_test_tensor)
    print(f'Accuracy: {accuracy:.2f}')

Accuracy: 0.77


In [14]:
x = torch.rand(5,3)
print(x)

tensor([[0.1745, 0.5751, 0.9501],
        [0.0629, 0.2154, 0.2607],
        [0.8669, 0.1852, 0.1086],
        [0.4996, 0.2926, 0.9888],
        [0.8556, 0.6951, 0.0362]])
