In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MultiLabelBinarizer

# Load the dataset
data = pd.read_csv('advertisement.csv')

# Split features and labels
X = data.drop(columns='labels')
y = data['labels'].str.get_dummies(sep=' ')  # Create a binary indicator for each class

# Split into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
print(y[:10])



   beauty  books  clothing  electronics  food  furniture  home  sports
0       0      0         1            1     0          0     0       1
1       1      0         0            0     0          1     0       0
2       0      0         1            1     1          0     0       1
3       0      0         0            0     1          0     0       0
4       0      0         0            0     0          0     1       0
5       0      1         0            1     0          0     0       1
6       1      0         1            0     0          1     0       0
7       1      1         0            0     0          0     0       0
8       0      0         0            1     1          0     1       1
9       0      0         0            0     1          1     0       0


In [1]:
# import numpy as np

# class MLP:
#     def __init__(self,input_size,output_size,lr,epoch,act_fn,opt_fn,n_hidden,n_neuron,batch_size,
#                  type = 'classification'
#                  ):
#         self.lr = lr
#         self.epoch = epoch
#         self.act_fn = act_fn
#         self.opt_fn = opt_fn
#         self.n_hidden = n_hidden
#         self.n_neuron = n_neuron
#         self.batch_size = batch_size
#         self.layer_sizes = input_size + n_neuron + output_size 
#         self.weights,self.bias = self.init_params()
#         self.type = type
    
#     def init_params(self):
#         # initialise the params for all layers of weights and bias:
#         weights = []
#         bias = []
#         for i in range(1,len(self.layer_sizes)):
#             weights.append(np.random.rand(self.n_neuron[i-1],self.n_neuron[i]))
#             bias.append(np.zeros((self.n_neuron[i],1)))
#         return weights,bias

        

#     def activation_fn(self,act_fn,z):
#         if act_fn == 'linear':
#             return z
#         elif act_fn == 'sigmoid':
#             return 1/(1+np.exp(-z))
#         elif act_fn == 'relu':
#             return np.maximum(z,0)
#         elif act_fn == 'tanh':
#             return np.tanh(z)

#     def output_cal(self,z):
#         if self.type == 'classification':
#             return self.activation_fn('sigmoid',z)
#         elif self.type == 'regression':
#             return z


#     def forward(self,X):
#         a_list = []
#         z_list = []
#         a = X.T
#         for i in range(self.n_hidden):
#             z = np.dot(self.weights[i],a) + self.bias[i]
#             a = self.activation_fn(self.act_fn,z)
#             a_list.append(a)
#             z_list.append(z)

#         z = np.dot(self.weights[-1],a) + self.bias[-1]
#         # output layer
#         y_hat = self.output_cal(z)
#         a_list.append(y_hat)
#         z_list.append(z)
#         return z_list,a_list


#     def backpropagation(self,X,y,z_list,a_list,loss):
#         # assume L is the loss:
#         gradient_w = []
#         gradient_b = []

#         # output layer
#         delta = self.output_act_derivative(z_list[-1])*loss
#         dE_dw = np.dot(delta,a_list[-2].T)/X.shape[0]
#         gradient_w.append(dE_dw)
#         dE_db = np.sum(delta,axis=0,keepdims=True)/X.shape[0]
#         gradient_b.append(dE_db)



#         #other layers:
#         for i in range(self.n_hidden-1,-1,-1):
#             # i -> n_hidden-1 to 0
#             delta = self.activation_fn_derivative(z_list[i])*np.dot(self.weights[i+1],delta)
#             if i > 0:
#                 dE_dw = np.dot(delta,a_list[i-1].T)
#             else:
#                 dE_dw = np.dot(delta,X.T)
#             dE_dw /= X.shape[0]
#             gradient_w.append(dE_dw)
#             dE_db = np.sum(delta,axis=1,keepdims=True)/X.shape[0]
#             gradient_b.append(dE_db)

#         return gradient_w.reverse(),gradient_b.reverse()


#     def optimizer(self):
#         # update weights and bias
#         if self.opt == 'SGD':
#             self.sdg_optimizer()
#         elif self.opt_fn == 'BGD':
#             self.batch_optimizer()
#         else:
#             self.mini_batch_optimizer()

#     def batch_optimizer(self,X,y):
#         # batch gradient descent
#         z,a = self.forward(X,y)
#         loss = self.loss_fn(y,a[-1])
#         gradient_w,gradient_b = self.backpropagation(X,y,z,a,loss)

#     def sdg_optimizer(self,X,y):
#         # stochastic gradient descent
#         for i in range(X.shape[0]):
#             z,a = self.forward_pass(X[i],y[i])
#             loss = self.loss_fn(y[i],z[-1])
#             gradient_w,gradient_b = self.backpropagation(X[i],y[i],z,a,loss)
            
#     def mini_batch_optimizer(self,X,y):
#         # mini batch gradient descent
#         for i in range(0,X.shape[0],self.batch_size):
#             end = min(X.shape[0]-1,i+self.batch_size)
#             X_batch = X[i:end]
#             y_batch = y[i:end]
#             z,a = self.forward(X_batch,y_batch)
#             loss = self.loss_fn(y_batch,a[-1])
#             gradient_w,gradient_b = self.backpropagation(X_batch,y_batch,z,a,loss)
    

#     def loss_fn(self):
#         pass

#     def fit(self):
#         pass

#     def predict(self):
#         pass

In [2]:
import numpy as np

class MLP:
    def __init__(self, input_size, output_size, lr=0.01, num_epoch=1000, act_fn='relu', opt_fn='SGD', 
                 n_hidden=1, n_neuron=None, batch_size=32, task_type='classification'):
        self.lr = lr
        self.num_epoch = num_epoch
        self.act_fn = act_fn
        self.opt_fn = opt_fn
        self.n_hidden = n_hidden
        self.n_neuron = n_neuron if n_neuron is not None else [64] * n_hidden  # Default to [64] neurons per hidden layer
        self.batch_size = batch_size
        self.task_type = task_type

        # Initialize layer sizes (input, hidden layers, output)
        self.layer_sizes = [input_size] + self.n_neuron + [output_size]
        self.weights, self.biases = self.init_params()

    def init_params(self):
        # Initialize weights and biases for all layers
        weights = []
        biases = []
        for i in range(len(self.layer_sizes) - 1):
            weights.append(np.random.randn(self.layer_sizes[i], self.layer_sizes[i + 1]))
            biases.append(np.zeros((1, self.layer_sizes[i + 1])))
        return weights, biases

    def activation_fn(self, z):
        if self.act_fn == 'linear':
            return z
        elif self.act_fn == 'sigmoid':
            return 1 / (1 + np.exp(-z))
        elif self.act_fn == 'relu':
            return np.maximum(0, z)
        elif self.act_fn == 'tanh':
            return np.tanh(z)

    def activation_fn_derivative(self, z):
        if self.act_fn == 'linear':
            return 1
        elif self.act_fn == 'sigmoid':
            sig = 1 / (1 + np.exp(-z))
            return sig * (1 - sig)
        elif self.act_fn == 'relu':
            return (z > 0).astype(float)
        elif self.act_fn == 'tanh':
            return 1 - np.tanh(z) ** 2

    def output_cal(self, z):
        if self.task_type == 'classification':
            return self.activation_fn('sigmoid')(z)
        elif self.task_type == 'regression':
            return z

    def forward(self, X):
        self.a_list = []
        self.z_list = []
        a = X

        for i in range(len(self.weights) - 1):
            z = np.dot(a, self.weights[i]) + self.biases[i]
            a = self.activation_fn(z)
            self.z_list.append(z)
            self.a_list.append(a)

        # Output layer
        z = np.dot(a, self.weights[-1]) + self.biases[-1]
        if self.task_type == 'classification':
            a = self.activation_fn(z)
        else:
            a = z  # For regression, no activation on the output
        self.z_list.append(z)
        self.a_list.append(a)

        return self.a_list[-1]

    def backward(self, X, y):
        m = X.shape[0]
        gradients_w = []
        gradients_b = []

        # Output layer
        if self.task_type == 'classification':
            # delta = self.bce_loss(y,self.a_list[-1]) # Derivative of BCE loss for classification
            delta = (self.a_list[-1] - y)  # Derivative of MSE loss for regression

        else:
            delta = (self.a_list[-1] - y) / m  # Derivative of MSE loss for regression

        for i in reversed(range(len(self.weights))):
            dW = np.dot(self.a_list[i - 1].T, delta) / m if i != 0 else np.dot(X.T, delta) / m
            dB = np.sum(delta, axis=0, keepdims=True) / m
            delta = np.dot(delta, self.weights[i].T) * self.activation_fn_derivative(self.z_list[i - 1])
            gradients_w.append(dW)
            gradients_b.append(dB)

        gradients_w.reverse()
        gradients_b.reverse()
        return gradients_w, gradients_b

    def update_params(self, gradients_w, gradients_b):
        for i in range(len(self.weights)):
            self.weights[i] -= self.lr * gradients_w[i]
            self.biases[i] -= self.lr * gradients_b[i]

    def optimizer(self, X, y):
        if self.opt_fn == 'SGD':
            self.sgd_optimizer(X, y)
        elif self.opt_fn == 'BGD':
            self.batch_optimizer(X, y)
        elif self.opt_fn == 'MBGD':
            self.mini_batch_optimizer(X, y)

    def sgd_optimizer(self, X, y):
        net_loss = 0
        for i in range(X.shape[0]):
            X_i = X[i:i+1]
            y_i = y[i:i+1]
            y_hat = self.forward(X_i)
            loss = self.bce_loss(y_i, y_hat) if self.task_type == 'classification' else self.mse_loss(y_i, y_hat)
            net_loss += loss
            gradients_w, gradients_b = self.backward(X_i, y_i)
            self.update_params(gradients_w, gradients_b)

        print(f'Epoch Loss: {net_loss}')

    def mini_batch_optimizer(self, X, y):
        for i in range(0, X.shape[0], self.batch_size):
            X_batch = X[i:i + self.batch_size]
            y_batch = y[i:i + self.batch_size]
            y_hat = self.forward(X_batch)
            loss = self.bce_loss(y_batch, y_hat) if self.task_type == 'classification' else self.mse_loss(y_batch, y_hat)
            gradients_w, gradients_b = self.backward(X_batch, y_batch)
            self.update_params(gradients_w, gradients_b)

    def batch_optimizer(self, X, y):
        y_hat = self.forward(X)
        loss = self.bce_loss(y, y_hat) if self.task_type == 'classification' else self.mse_loss(y, y_hat)
        gradients_w, gradients_b = self.backward(X, y)
        self.update_params(gradients_w, gradients_b)

    def bce_loss(self, y, y_hat):
        m = y.shape[0]
        y_hat = np.clip(y_hat, 1e-9, 1 - 1e-9)
        return -1/m * np.sum(y * np.log(y_hat) + (1 - y) * np.log(1 - y_hat))

    def mse_loss(self, y, y_hat):
        m = y.shape[0]
        return np.sum((y_hat - y) ** 2) / (2 * m)

    def fit(self, X, y):
        for epoch in range(self.num_epoch):
            self.optimizer(X, y)

    def predict(self, X):
        y_pred = self.forward(X)
        if self.task_type == 'classification':
            return np.where(y_pred > 0.5, 1, 0)
        else:
            return y_pred


In [3]:
import torch
import torch.nn as nn
import torch.optim as optim

class MLP_py(nn.Module):  # Inheriting from nn.Module
    def __init__(self, input_size, output_size, lr=0.01, num_epoch=1000,
                 act_fn='relu', opt_fn='SGD', n_hidden=1, n_neuron=64,
                 batch_size=32, task_type='classification'):
        super(MLP_py, self).__init__()  # Correct usage of super for nn.Module
        self.lr = lr
        self.num_epoch = num_epoch
        self.batch_size = batch_size
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(input_size, n_neuron))

        # Adding hidden layers
        for _ in range(n_hidden - 1):
            self.layers.append(nn.Linear(n_neuron, n_neuron))

        self.layers.append(nn.Linear(n_neuron, output_size))  # Output layer

        # Activation function
        self.activation = nn.ReLU() if act_fn == 'relu' else nn.Sigmoid()  # Change according to your needs

        # Optimizer initialization
        self.optimizer = optim.SGD(self.parameters(), lr=self.lr)

    def forward(self, x):
        for layer in self.layers[:-1]:
            x = self.activation(layer(x))
        return self.layers[-1](x)  # Output layer without activation

    def fit(self, X_train, y_train):
        self.train()  # Set model to training mode
        for epoch in range(self.num_epoch):
            inputs = torch.tensor(X_train, dtype=torch.float32)
            targets = torch.tensor(y_train, dtype=torch.float32)

            self.optimizer.zero_grad()  # Zero the gradients
            outputs = self.forward(inputs)  # Forward pass
            loss = nn.BCEWithLogitsLoss()(outputs, targets)  # Binary classification loss
            print(loss)
            loss.backward()  # Backpropagation
            self.optimizer.step()  # Update weights

    def predict(self, X):
        self.eval()  # Set model to evaluation mode
        with torch.no_grad():
            inputs = torch.tensor(X, dtype=torch.float32)
            outputs = self.forward(inputs)
            return torch.sigmoid(outputs)  # Apply sigmoid for probabilities


In [4]:
import pandas as pd
from sklearn.preprocessing import StandardScaler

def DataPreprocessing(file_name):
    data = pd.read_csv(file_name, index_col=0)
    data.dropna(inplace=True)
    
    # Remove duplicates
    data.drop_duplicates(inplace=True)
    
    # Select numeric columns and remove 'quality'
    numeric_columns = data.select_dtypes(include=['float64', 'int64']).columns.tolist()
    numeric_columns.remove('quality')  # Remove 'quality' from the list
    print(numeric_columns)  # Print remaining numeric columns
    
    # Data normalization using StandardScaler (normal standardization)
    scaler = StandardScaler()
    data[numeric_columns] = scaler.fit_transform(data[numeric_columns])
    
    return data




In [5]:
# import numpy as np

# class MLP:
#     def __init__(self, input_size, output_size, hidden_layers, 
#                  neurons_per_layer, learning_rate, epochs, batch_size, 
#                  activation='relu', optimizer='sgd', task='classification'):
#         self.input_size = input_size
#         self.output_size = output_size
#         self.hidden_layers = hidden_layers
#         self.neurons_per_layer = neurons_per_layer
#         self.learning_rate = learning_rate
#         self.epochs = epochs
#         self.batch_size = batch_size
#         self.activation = activation
#         self.optimizer = optimizer
#         self.task = task  # 'classification' or 'regression'
#         self.weights = []
#         self.biases = []
        
#         # Initialize the network
#         self._initialize_weights()

#     def _initialize_weights(self):
#         # Randomly initialize weights and biases for each layer
#         layer_sizes = [self.input_size] + self.neurons_per_layer + [self.output_size]
#         for i in range(len(layer_sizes) - 1):
#             self.weights.append(np.random.randn(layer_sizes[i], layer_sizes[i + 1]))
#             self.biases.append(np.zeros((1, layer_sizes[i + 1])))

#     def _activation_function(self, x):
#         if self.activation == 'relu':
#             return np.maximum(0, x)
#         elif self.activation == 'sigmoid':
#             return 1 / (1 + np.exp(-x))
#         elif self.activation == 'tanh':
#             return np.tanh(x)
#         elif self.activation == 'linear':
#             return x

#     def _activation_derivative(self, x):
#         if self.activation == 'relu':
#             return np.where(x > 0, 1, 0)
#         elif self.activation == 'sigmoid':
#             return x * (1 - x)
#         elif self.activation == 'tanh':
#             return 1 - x ** 2
#         elif self.activation == 'linear':
#             return 1

#     def _bce_loss(self, y_true, y_pred):
#         # Binary Cross-Entropy loss (for classification)
#         m = y_true.shape[0]
#         y_pred = np.clip(y_pred, 1e-9, 1 - 1e-9)  # To avoid log(0)
#         loss = -1/m * np.sum(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
#         return loss

#     def _mse_loss(self, y_true, y_pred):
#         # Mean Squared Error loss (for regression)
#         m = y_true.shape[0]
#         loss = np.sum((y_pred - y_true) ** 2) / (2 * m)
#         return loss

#     def forward(self, X):
#         self.z_values = []  # To store linear combinations
#         self.a_values = [X]  # To store activations

#         for i in range(len(self.weights)):
#             z = np.dot(self.a_values[-1], self.weights[i]) + self.biases[i]
#             self.z_values.append(z)
#             # For regression task, the output layer uses a linear activation
#             if i == len(self.weights) - 1 and self.task == 'regression':
#                 a = z  # No activation for the output layer in regression
#             else:
#                 a = self._activation_function(z)
#             self.a_values.append(a)
        
#         return self.a_values[-1]

#     def backward(self, X, y):
#         # Compute gradients using backpropagation
#         m = X.shape[0]
        
#         if self.task == 'classification':
#             dz = self.a_values[-1] - y  # For BCE, derivative of the loss w.r.t the output
#         else:  # regression
#             dz = (self.a_values[-1] - y) / m  # For MSE, derivative of the loss w.r.t the output
        
#         for i in reversed(range(len(self.weights))):
#             dw = np.dot(self.a_values[i].T, dz) / m
#             db = np.sum(dz, axis=0, keepdims=True) / m
#             dz = np.dot(dz, self.weights[i].T) * self._activation_derivative(self.a_values[i])
            
#             # Update weights and biases
#             self.weights[i] -= self.learning_rate * dw
#             self.biases[i] -= self.learning_rate * db

#     def fit(self, X_train, y_train):
#         # Main training loop
#         for epoch in range(self.epochs):
#             epoch_loss = 0
#             for i in range(0, len(X_train), self.batch_size):
#                 X_batch = X_train[i:i + self.batch_size]
#                 y_batch = y_train[i:i + self.batch_size]
                
#                 # Forward pass
#                 y_pred = self.forward(X_batch)
                
#                 # Compute loss
#                 if self.task == 'classification':
#                     loss = self._bce_loss(y_batch, y_pred)
#                 else:  # regression
#                     loss = self._mse_loss(y_batch, y_pred)
                
#                 epoch_loss += loss
                
#                 # Backward pass and update
#                 self.backward(X_batch, y_batch)
                
#             # Average loss per epoch
#             epoch_loss /= len(X_train) // self.batch_size
#             print(f'Epoch {epoch+1}/{self.epochs}, Loss: {epoch_loss:.4f}')

#     def predict(self, X_test):
#         # Forward pass to get predictions
#         y_pred = self.forward(X_test)
        
#         if self.task == 'classification':
#             # Convert probabilities to binary class predictions
#             return np.where(y_pred > 0.5, 1, 0)
#         else:
#             # For regression, return the continuous value
#             return y_pred


In [6]:
# Your existing imports remain the same
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# mlp = MLP(input_size=11, output_size=1, hidden_layers=2, neurons_per_layer=[64, 32], learning_rate=0.01, epochs=500, batch_size=32)

# Assuming the MLP and DataPreprocessing class/method are already defined as per your code

# Step 1: Data Preprocessing
data = pd.read_csv('WineQT.csv', index_col=0)
data['quality'] = (data['quality'] >= 5).astype(int)  # Binary
data.to_csv('WineQT1.csv')

df = DataPreprocessing('WineQT1.csv')
df.to_csv('final.csv')

# Step 2: Split data into train/test sets
X = df.drop(columns=['quality'])
y = df['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to numpy arrays as our MLP class expects numpy inputs
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy().reshape(-1, 1)  # Reshape to (n_samples, 1)
y_test = y_test.to_numpy().reshape(-1, 1)    # Reshape to (n_samples, 1)

# Step 3: Initialize and train the MLP model
input_size = X_train.shape[1]  # Number of features
output_size = 1  # Binary classification
mlp1 = MLP(input_size=input_size, output_size=output_size, lr=0.01, num_epoch=500, act_fn='relu', 
          opt_fn='MBGD', n_hidden=3, n_neuron=[64,64,16], batch_size=32, task_type='classification')

# # Train the model
# mlp.fit(X_train, y_train)

# # Step 4: Predict and Output Metrics
# y_train_pred = mlp.predict(X_train)
# y_test_pred = mlp.predict(X_test)

# # Assuming y_train_pred and y_test_pred contain probabilities
# threshold = 0.5
# y_train_pred_binary = (y_train_pred >= threshold).astype(int)  # Convert to binary
# y_test_pred_binary = (y_test_pred >= threshold).astype(int)

# # Calculate metrics
# train_accuracy = accuracy_score(y_train.flatten(), y_train_pred_binary.flatten())
# test_accuracy = accuracy_score(y_test.flatten(), y_test_pred_binary.flatten())

# print(f"Train Accuracy: {train_accuracy:.4f}")
# print(f"Test Accuracy: {test_accuracy:.4f}")



['volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'Id']


In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import torch

# Assuming the MLP class is already defined as in the previous response

# Data Preprocessing
data = pd.read_csv('WineQT.csv', index_col=0)
data['quality'] = (data['quality'] >= 5).astype(int)  # Binary
data.to_csv('WineQT1.csv')

df = DataPreprocessing('WineQT1.csv')
df.to_csv('final.csv')

# Split data into train/test sets
X = df.drop(columns=['quality'])
y = df['quality']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert to numpy arrays as our MLP class expects numpy inputs
X_train = X_train.to_numpy()
X_test = X_test.to_numpy()
y_train = y_train.to_numpy().reshape(-1, 1)  # Reshape to (n_samples, 1)
y_test = y_test.to_numpy().reshape(-1, 1)    # Reshape to (n_samples, 1)

# Initialize and train the MLP model
input_size = X_train.shape[1]  # Number of features
output_size = 1  # Binary classification
mlp = MLP_py(input_size=input_size, output_size=output_size, lr=0.01, num_epoch=100, 
          act_fn='relu', opt_fn='SGD', n_hidden=3, n_neuron=64, batch_size=32, task_type='classification')

# Train the model
mlp.fit(X_train, y_train)

# Predict and Output Metrics
y_train_pred = mlp.predict(X_train)
y_test_pred = mlp.predict(X_test)

# Convert predictions to binary
threshold = 0.5
y_train_pred_binary = (y_train_pred.numpy() >= threshold).astype(int)  # Convert to binary
y_test_pred_binary = (y_test_pred.numpy() >= threshold).astype(int)

# Calculate metrics
train_accuracy = accuracy_score(y_train.flatten(), y_train_pred_binary.flatten())
test_accuracy = accuracy_score(y_test.flatten(), y_test_pred_binary.flatten())

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


mlp1.fit(X_train, y_train)

# Step 4: Predict and Output Metrics
y_train_pred = mlp1.predict(X_train)
y_test_pred = mlp1.predict(X_test)

# Assuming y_train_pred and y_test_pred contain probabilities
threshold = 0.5
y_train_pred_binary = (y_train_pred >= threshold).astype(int)  # Convert to binary
y_test_pred_binary = (y_test_pred >= threshold).astype(int)

# Calculate metrics
train_accuracy = accuracy_score(y_train.flatten(), y_train_pred_binary.flatten())
test_accuracy = accuracy_score(y_test.flatten(), y_test_pred_binary.flatten())

print(f"Train Accuracy: {train_accuracy:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


['volatile acidity', 'citric acid', 'residual sugar', 'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'Id']
tensor(0.6130, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.6103, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.6076, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.6050, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.6023, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5997, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5971, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5945, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5919, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5893, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5867, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5842, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(0.5816, grad_fn=<BinaryCrossEntropyWithLogitsBackward0>)
tensor(