## Logistic Regression using Tensorflow

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
# import matplotlib.pyplot as plt

In [2]:
cols = ["fLength", "fWidth", "fSize", "fConc", "fConc1", "fAsym", "fM3Long", "fM3Trans", "fAlpha", "fDist", "class"]
df = pd.read_csv("magic04.data", names=cols)
df["class"] = (df["class"] == "g").astype(int)

In [3]:
df.head()

Unnamed: 0,fLength,fWidth,fSize,fConc,fConc1,fAsym,fM3Long,fM3Trans,fAlpha,fDist,class
0,28.7967,16.0021,2.6449,0.3918,0.1982,27.7004,22.011,-8.2027,40.092,81.8828,1
1,31.6036,11.7235,2.5185,0.5303,0.3773,26.2722,23.8238,-9.9574,6.3609,205.261,1
2,162.052,136.031,4.0612,0.0374,0.0187,116.741,-64.858,-45.216,76.96,256.788,1
3,23.8172,9.5728,2.3385,0.6147,0.3922,27.2107,-6.4633,-7.1513,10.449,116.737,1
4,75.1362,30.9205,3.1611,0.3168,0.1832,-5.5277,28.5525,21.8393,4.648,356.462,1


In [4]:
tf.random.set_seed(42)
np.random.seed(42)

In [5]:
# Shuffle the data
df = df.sample(frac=1).reset_index(drop=True)

In [6]:
# Split the data into features and target
features = df.drop('class', axis=1).values
target = df["class"].values

Split the data into training and validation sets

In [7]:
# Split the data into training and validation sets
train_size = int(0.8 * len(features))
X_train, X_val = features[:train_size], features[train_size:]
y_train, y_val = target[:train_size], target[train_size:]

In [8]:
print(y_train)

[0 1 1 ... 1 0 0]


Model Hyperparameters

In [9]:
learning_rate = 0.01
num_epochs = 100
display_step = 100
batch_size = 32
num_features = X_train.shape[1]
num_classes = 2

In [10]:
# Reshape the target labels
y_train = np.eye(num_classes)[y_train.reshape(-1)]
y_val = np.eye(num_classes)[y_val.reshape(-1)]

In [11]:
y_train

array([[1., 0.],
       [0., 1.],
       [0., 1.],
       ...,
       [0., 1.],
       [1., 0.],
       [1., 0.]])

In [12]:
# Create the weight and bias variables
W = tf.Variable(tf.zeros([num_features, num_classes], dtype=tf.float64))
b = tf.Variable(tf.zeros([num_classes], dtype=tf.float64))

In [13]:
# Define the logistic regression model
@tf.function
def logistic_regression(inputs):
    logits = tf.matmul(tf.cast(inputs, dtype=tf.float64), W) + b
    return tf.nn.softmax(logits)

In [14]:
# Define the loss function (cross-entropy)
@tf.function
def loss_fn(inputs, labels):
    logits = logistic_regression(inputs)
    loss_value = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=labels, logits=logits))
    return loss_value

In [15]:
# Define the optimizer
optimizer = tf.keras.optimizers.SGD(learning_rate)

In [16]:
# Define the training step
@tf.function
def train_step(inputs, labels):
    with tf.GradientTape() as tape:
        loss_value = loss_fn(inputs, labels)
    gradients = tape.gradient(loss_value, [W, b])
    optimizer.apply_gradients(zip(gradients, [W, b]))
    return loss_value

In [17]:
# Start the training loop
for epoch in range(num_epochs):
    num_batches = len(X_train) // batch_size

    for batch in range(num_batches):
        batch_indices = np.random.choice(len(X_train), size=batch_size, replace=False)
        batch_features = X_train[batch_indices]
        batch_target = y_train[batch_indices]

        loss_value = train_step(batch_features, batch_target)

    if (epoch + 1) % 10 == 0:
        print(f'Epoch {epoch + 1}/{num_epochs}, Loss: {loss_value:.5f}')

print("Training completed!")

Epoch 10/100, Loss: 0.75076
Epoch 20/100, Loss: 0.55948
Epoch 30/100, Loss: 0.65703
Epoch 40/100, Loss: 0.67499
Epoch 50/100, Loss: 0.59766
Epoch 60/100, Loss: 0.43874
Epoch 70/100, Loss: 0.53795
Epoch 80/100, Loss: 0.60797
Epoch 90/100, Loss: 0.53095
Epoch 100/100, Loss: 0.50076
Training completed!


In [18]:
# Calculate the validation accuracy
val_pred = logistic_regression(X_val).numpy()
val_pred_labels = np.argmax(val_pred, axis=1)
val_true_labels = np.argmax(y_val, axis=1)
val_accuracy = np.mean(val_pred_labels == val_true_labels)
print(f'Validation Accuracy: {val_accuracy:.5f}')

Validation Accuracy: 0.73764


In [19]:
val_pred_labels

array([1, 0, 0, ..., 1, 0, 0], dtype=int64)

In [20]:
val_true_labels

array([1, 0, 0, ..., 1, 1, 0], dtype=int64)