In [50]:
#Imports
import tensorflow as tf
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, precision_score, recall_score

Data Defining

In [48]:
# Load the CSV file into a Pandas DataFrame
csv_file = './winequality/winequality-red.csv'
df = pd.read_csv(csv_file, delimiter=';')

# Print the column names to debug
print("Column names:", df.columns)


# Assuming df is your data frame and 'quality' is the column you want to predict
target = df.pop('quality')
# Convert quality into binary classification (1 for quality >= 6, otherwise 0)
target = target.apply(lambda x: 1 if x >= 6 else 0)

#Pop top 5 rows for testing

print("Target:", target.head())
print("Data:", df.head())


# Convert the DataFrame and the target column to TensorFlow tensors
y_train = tf.convert_to_tensor(target.values, dtype=tf.float32, name='y_train')
X_train = tf.convert_to_tensor(df.values, dtype=tf.float32, name='X_train')

y_train = tf.reshape(y_train, (-1, 1))

print(X_train.shape)
print(y_train.shape)

Column names: Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')
Target: 0    0
1    0
2    0
3    1
4    0
Name: quality, dtype: int64
Data:    fixed acidity  volatile acidity  citric acid  residual sugar  chlorides  \
0            7.4              0.70         0.00             1.9      0.076   
1            7.8              0.88         0.00             2.6      0.098   
2            7.8              0.76         0.04             2.3      0.092   
3           11.2              0.28         0.56             1.9      0.075   
4            7.4              0.70         0.00             1.9      0.076   

   free sulfur dioxide  total sulfur dioxide  density    pH  sulphates  \
0                 11.0                  34.0   0.9978  3.51       0.56   
1                 25.0                  67.0   0.9968  3.20       

In [36]:
# Define the logistic regression model
class LogisticRegressionModel(tf.Module):
    def __init__(self, input_dim):
        # Initialize weights and bias
        self.w = tf.Variable(tf.random.normal([input_dim, 1]), name='weights')
        self.b = tf.Variable(tf.zeros([1]), name='bias')

    def __call__(self, X):
        # Linear combination with sigmoid activation
        logits = tf.matmul(X, self.w) + self.b
        return tf.sigmoid(logits)

In [37]:
#Define Loss function Binary Cross Entropy
def loss_fn(model, X, y):
    y_pred = model(X)
    return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(logits=y_pred, labels=y))

In [42]:
# Define the training step
def train_step(model, X, y, learning_rate=0.01):
    with tf.GradientTape() as tape:
        loss = loss_fn(model, X, y)
    gradients = tape.gradient(loss, [model.w, model.b])
    model.w.assign_sub(learning_rate * gradients[0])
    model.b.assign_sub(learning_rate * gradients[1])
    return loss

In [44]:
# Training the model
model = LogisticRegressionModel(input_dim=X_train.shape[1])
epochs = 10000
for epoch in range(epochs):
    loss = train_step(model, X_train, y_train, learning_rate=0.01)
    if epoch % 1000 == 0:
        print(f'Epoch {epoch}, Loss: {loss.numpy()}')

Epoch 0, Loss: 0.7785525321960449
Epoch 1000, Loss: 0.7785525321960449
Epoch 2000, Loss: 0.7785525321960449
Epoch 3000, Loss: 0.7785525321960449
Epoch 4000, Loss: 0.7785525321960449
Epoch 5000, Loss: 0.7785525321960449
Epoch 6000, Loss: 0.7785525321960449
Epoch 7000, Loss: 0.7785525321960449
Epoch 8000, Loss: 0.7785525321960449
Epoch 9000, Loss: 0.7785525321960449


In [51]:
# Testing the model
X_test = np.array([[7,0.30,0.55,1,0.040,6,12,0.97,2.66,0.69,11.7]])
X_test = tf.convert_to_tensor(X_test, dtype=tf.float32)
y_test_pred = model(X_test)
print("Predictions:", y_test_pred.numpy())

# Predict using the trained model (binary output)
y_train_pred = model(X_train)
y_train_pred = tf.round(y_train_pred)  # Convert probabilities to binary outcomes (0 or 1)

# Convert tensors to numpy arrays for metric calculation
y_train_true_np = y_train.numpy()
y_train_pred_np = y_train_pred.numpy()

# Calculate accuracy, precision, and recall
accuracy = accuracy_score(y_train_true_np, y_train_pred_np)
precision = precision_score(y_train_true_np, y_train_pred_np)
recall = recall_score(y_train_true_np, y_train_pred_np)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")

Predictions: [[1.]]
Accuracy: 0.5347091932457786
Precision: 0.5347091932457786
Recall: 1.0
