<a href="https://colab.research.google.com/github/dvsmihir1/ML-Lab/blob/main/ML7.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install VitalDB


Collecting VitalDB
  Downloading vitaldb-1.4.9-py3-none-any.whl.metadata (520 bytes)
Collecting wfdb (from VitalDB)
  Downloading wfdb-4.1.2-py3-none-any.whl.metadata (4.3 kB)
Downloading vitaldb-1.4.9-py3-none-any.whl (57 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.5/57.5 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading wfdb-4.1.2-py3-none-any.whl (159 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m160.0/160.0 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: wfdb, VitalDB
Successfully installed VitalDB-1.4.9 wfdb-4.1.2


In [24]:
import pandas as pd
import numpy as np
from sklearn.model_selection import RandomizedSearchCV
from sklearn.neural_network import MLPClassifier
from sklearn.linear_model import Perceptron
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from catboost import CatBoostClassifier
from sklearn.ensemble import AdaBoostClassifier
from xgboost import XGBClassifier
from sklearn.naive_bayes import GaussianNB

In [25]:
dfcases = pd.read_csv("https://api.vitaldb.net/cases")

In [50]:
import numpy as np

class NeuralNetwork:
    def __init__(self, input_size, hidden_size, output_size):
        # Initialize the sizes of input, hidden, and output layers
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.output_size = output_size

        # Initialize weights with random values for the connections between layers
        self.weights_input_hidden = np.random.randn(self.input_size, self.hidden_size)
        self.weights_hidden_output = np.random.randn(self.hidden_size, self.output_size)

        # Initialize biases for hidden and output layers to zero
        self.bias_hidden = np.zeros((1, self.hidden_size))
        self.bias_output = np.zeros((1, self.output_size))

    def sigmoid(self, x):
        # Sigmoid activation function to introduce non-linearity
        return 1 / (1 + np.exp(-x))

    def sigmoid_derivative(self, x):
        # Derivative of the sigmoid function for backpropagation
        return x * (1 - x)

    def feedforward(self, X):
        # Compute the activation of the hidden layer
        self.hidden_activation = np.dot(X, self.weights_input_hidden) + self.bias_hidden
        self.hidden_output = self.sigmoid(self.hidden_activation)

        # Compute the activation of the output layer
        self.output_activation = np.dot(self.hidden_output, self.weights_hidden_output) + self.bias_output
        self.predicted_output = self.sigmoid(self.output_activation)

        # Return the predicted output from the network
        return self.predicted_output

    def backward(self, X, y, learning_rate):
        # Compute the error at the output layer
        output_error = y.reshape(-1, 1) - self.predicted_output  # Ensure y matches the output dimensions
        output_delta = output_error * self.sigmoid_derivative(self.predicted_output)

        # Compute the error at the hidden layer
        hidden_error = np.dot(output_delta, self.weights_hidden_output.T)
        hidden_delta = hidden_error * self.sigmoid_derivative(self.hidden_output)

        # Update weights and biases for the hidden to output layer connections
        self.weights_hidden_output += np.dot(self.hidden_output.T, output_delta) * learning_rate
        self.bias_output += np.sum(output_delta, axis=0, keepdims=True) * learning_rate

        # Update weights and biases for the input to hidden layer connections
        self.weights_input_hidden += np.dot(X.T, hidden_delta) * learning_rate
        self.bias_hidden += np.sum(hidden_delta, axis=0, keepdims=True) * learning_rate

    def train(self, X, y, epochs, learning_rate, conv=0.2):
        # Train the neural network over a specified number of epochs
        for epoch in range(epochs):
            # Perform a feedforward pass to get predictions
            output = self.feedforward(X)
            # Perform a backward pass to update weights and biases
            self.backward(X, y, learning_rate)
            # Calculate loss as mean squared error between actual and predicted outputs
            loss = np.mean(np.square(y - output))
            # Check for convergence; stop training if loss falls below threshold
            if conv > loss:
                break  # Exit training loop if convergence is achieved


In [51]:
# Extract height, weight, and bmi values from the DataFrame
height = dfcases["height"].values  # Get height values as a NumPy array
weight = dfcases["weight"].values  # Get weight values as a NumPy array
bmi = dfcases["bmi"].values        # Get BMI values as a NumPy array

# Create feature matrix X by combining height and weight into pairs
X = np.array([[y, x] for x, y in zip(height, weight)])  # Each row contains [height, weight]

# Create target vector y containing BMI values
y = np.array([i for i in bmi])  # Convert BMI values into a NumPy array

# Initialize the neural network with specified input size, hidden size, and output size
nn = NeuralNetwork(input_size=2, hidden_size=4, output_size=1)

# Train the neural network using the feature matrix X and target vector y
nn.train(X, y, epochs=1000, learning_rate=0.1, conv=0.002)

# Perform a feedforward pass to get predictions on the training data
output = nn.feedforward(X)

# Print the predicted output (BMI) from the neural network
print(output)

[[1.]
 [1.]
 [1.]
 ...
 [1.]
 [1.]
 [1.]]


In [52]:
# Define a dictionary of hyperparameter distributions for tuning the MLPClassifier
param_distributions_mlp = {
    'hidden_layer_sizes': [(50,)],  # Tuple specifying the number of neurons in hidden layers
    'activation': ['relu', 'tanh', 'logistic'],  # Activation functions to choose from
    'solver': ['lbfgs', 'sgd', 'adam'],  # Optimization algorithms for weight updates
    'alpha': [0.002, 0.001, 0.01],  # Regularization parameter to prevent overfitting
    'learning_rate': ['constant', 'invscaling', 'adaptive'],  # Learning rate strategies
    'max_iter': [100, 500, 1000],  # Maximum number of iterations for training
    'tol': [1e-3, 1e-4, 1e-5]  # Tolerance for stopping criteria; training stops if loss improvement is below this threshold
}

# Initialize the MLPClassifier with specified parameters
mlp = MLPClassifier(
    hidden_layer_sizes=(6,),  # Set a hidden layer with 6 neurons
    activation='logistic',     # Use logistic activation function for neurons
    solver='lbfgs',           # Use the LBFGS optimization algorithm
    max_iter=1000             # Set maximum iterations for training to 1000
)

In [53]:
# Set up RandomizedSearchCV for hyperparameter tuning of the MLPClassifier
random_search_mlp = RandomizedSearchCV(
    estimator=mlp,                        # The MLPClassifier instance to be tuned
    param_distributions=param_distributions_mlp,  # The hyperparameter space to explore
    n_iter=10,                            # Number of different combinations to try
    cv=5,                                 # Number of cross-validation folds
    scoring='accuracy',                   # Metric to evaluate the performance of each combination
    verbose=2,                            # Level of verbosity; higher values show more details during the search
    random_state=0                        # Seed for reproducibility of results
)

In [39]:
# Prepare features and labels
height = dfcases["height"].values
weight = dfcases["weight"].values
bmi = dfcases["bmi"].values


In [54]:
# Create feature matrix X by combining height and weight into pairs
X = np.array([[h, w] for h, w in zip(height, weight)])  # Each row contains [height, weight]

# Convert BMI to categorical labels using digitization
# The bins define the thresholds for categorizing BMI:
# - 0 for underweight (BMI < 18.5)
# - 1 for normal weight (18.5 <= BMI < 24.9)
# - 2 for overweight (BMI >= 24.9)
y = np.digitize(bmi, bins=[18.5, 24.9])  # Assigns categories based on defined bins

# Fit the MLP model using RandomizedSearchCV
random_search_mlp.fit(X, y)  # Train the MLP model with the feature matrix X and labels y

# Initialize Perceptron model
perceptron = Perceptron()  # Create an instance of the Perceptron classifier

# Define parameter distributions for Perceptron hyperparameter tuning
param_distributions_perceptron = {
    'penalty': ['l2', 'l1', 'elasticnet', None],  # Regularization penalties to apply
    'alpha': [0.0001, 0.001, 0.01],               # Learning rate parameter
    'max_iter': [1000, 2000, 3000],               # Maximum number of iterations for training
    'tol': [1e-3, 1e-4, 1e-5]                     # Tolerance for stopping criteria; training stops if improvement is below this threshold
}

Fitting 5 folds for each of 10 candidates, totalling 50 fits




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   7.0s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   7.0s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   9.2s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   7.3s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   5.7s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.1s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.9s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.1s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.1s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   4.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   3.3s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   5.1s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   2.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   4.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   2.8s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   3.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   3.1s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   4.7s
[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   4.4s
[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   3.8s
[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   1.5s
[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   6.3s


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   7.1s
[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   5.7s
[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   3.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   9.0s
[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   1.4s
[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   4.0s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   2.9s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   3.2s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   3.3s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   2.9s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   2.9s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   7.3s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   2.8s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   0.4s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   3.3s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   4.0s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.9s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.5s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.2s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.2s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.2s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.5s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.8s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.6s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.4s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


In [55]:
# Initialize RandomizedSearchCV for Perceptron to tune hyperparameters
random_search_perceptron = RandomizedSearchCV(
    estimator=perceptron,                        # The Perceptron instance to be tuned
    param_distributions=param_distributions_perceptron,  # The hyperparameter space to explore
    n_iter=10,                                   # Number of different combinations to try
    cv=5,                                        # Number of cross-validation folds to use
    scoring='accuracy',                          # Metric to evaluate the performance of each combination
    verbose=2,                                   # Level of verbosity; higher values show more details during the search
    random_state=0                               # Seed for reproducibility of results
)

# Fit the Perceptron model using RandomizedSearchCV
random_search_perceptron.fit(X, y)             # Train the Perceptron model with the feature matrix X and labels y

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END ...alpha=0.01, max_iter=2000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ...alpha=0.01, max_iter=2000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ...alpha=0.01, max_iter=2000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ...alpha=0.01, max_iter=2000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ...alpha=0.01, max_iter=2000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END alpha=0.0001, max_iter=1000, penalty=None, tol=0.0001; total time=   0.0s
[CV] END alpha=0.0001, max_iter=1000, penalty=None, tol=0.0001; total time=   0.0s
[CV] END alpha=0.0001, max_iter=1000, penalty=None, tol=0.0001; total time=   0.0s
[CV] END alpha=0.0001, max_iter=1000, penalty=None, tol=0.0001; total time=   0.0s
[CV] END alpha=0.0001, max_iter=1000, penalty=None, tol=0.0001; total time=   0.0s
[CV] END ...alpha=0.01, max_iter=1000, penalty=l1, tol=0.001; total time=   0.0s
[CV] END ...alpha=0.01, max_iter=1000,

In [56]:
print("Best Perceptron Hyperparameters:", random_search_perceptron.best_params_)
print("Best Perceptron Accuracy:", random_search_perceptron.best_score_)
print("\n")

# Print best hyperparameters and scores for MLP
print("Best MLP Hyperparameters:", random_search_mlp.best_params_)
print("Best MLP Accuracy:", random_search_mlp.best_score_)

Best Perceptron Hyperparameters: {'tol': 0.0001, 'penalty': 'l1', 'max_iter': 2000, 'alpha': 0.0001}
Best Perceptron Accuracy: 0.8141740900462378


Best MLP Hyperparameters: {'tol': 0.001, 'solver': 'lbfgs', 'max_iter': 500, 'learning_rate': 'invscaling', 'hidden_layer_sizes': (50,), 'alpha': 0.002, 'activation': 'logistic'}
Best MLP Accuracy: 0.9466085296255038


In [57]:
# Extract height, weight, and BMI values from the DataFrame
height = dfcases["height"].values  # Get height values as a NumPy array
weight = dfcases["weight"].values  # Get weight values as a NumPy array
bmi = dfcases["bmi"].values        # Get BMI values as a NumPy array

# Create feature matrix X by combining height and weight into pairs
X = np.array([[y, x] for x, y in zip(height, weight)])  # Each row contains [height, weight]

# Create target vector y containing BMI values
y = np.array([i for i in bmi])  # Convert BMI values into a NumPy array

# Initialize the MLPClassifier with specified parameters
mlp = MLPClassifier(hidden_layer_sizes=(10,), activation='logistic', solver='lbfgs', max_iter=10000000000000000)

# Convert target vector y to integers (if necessary)
y = y.astype('int')  # Ensure that y is of integer type for classification

# Fit the MLP model using the feature matrix X and target vector y
mlp.fit(X, y)  # Train the MLP model on the data

# Make predictions and print results for the first few samples (up to 100)
for i in range(len(X)//100):  # Loop through every 100th sample
    prediction = mlp.predict([X[i]])[0]  # Predict the output for the current input sample
    print(f"Input: {X[i]}, Target: {y[i]}, Prediction: {prediction}")  # Print input, target, and predicted output

Input: [ 67.5 160.2], Target: 26, Prediction: 23
Input: [ 54.8 167.3], Target: 19, Prediction: 23
Input: [ 69.7 169.1], Target: 24, Prediction: 23
Input: [ 53.  160.6], Target: 20, Prediction: 23
Input: [ 59.7 171. ], Target: 20, Prediction: 23
Input: [ 54.6 150. ], Target: 24, Prediction: 23
Input: [ 62.3 167.7], Target: 22, Prediction: 23
Input: [ 67.25 156.7 ], Target: 27, Prediction: 23
Input: [ 50.9 157.9], Target: 20, Prediction: 23
Input: [ 62.75 162.5 ], Target: 23, Prediction: 23
Input: [ 81.45 175.4 ], Target: 26, Prediction: 23
Input: [ 81.4 169.2], Target: 28, Prediction: 23
Input: [ 64.9 153. ], Target: 27, Prediction: 23
Input: [ 80.  177.9], Target: 25, Prediction: 23
Input: [ 48.3 158. ], Target: 19, Prediction: 23
Input: [ 68.9 162.3], Target: 26, Prediction: 23
Input: [ 53.  164.2], Target: 19, Prediction: 23
Input: [ 56.9 155. ], Target: 23, Prediction: 23
Input: [ 66.2 171.3], Target: 22, Prediction: 23
Input: [ 61.3 173.6], Target: 20, Prediction: 23
Input: [ 61.6 

In [58]:
def perceptron_tuning():
    # Define parameter distributions for tuning the Perceptron model
    param_distributions_perceptron = {
        'penalty': ['l2'],  # Perceptron only supports 'l2' regularization
        'alpha': [0.001, 0.01, 0.1],  # Learning rate values to test
        'max_iter': [100, 500, 1000],  # Maximum iterations for training
        'tol': [1e-3, 1e-4, 1e-5]  # Tolerance for stopping criteria
    }

    # Initialize the Perceptron model with a random state for reproducibility
    perceptron = Perceptron(random_state=0)

    # Set up RandomizedSearchCV for hyperparameter tuning of the Perceptron model
    random_search_perceptron = RandomizedSearchCV(
        estimator=perceptron,  # The Perceptron instance to be tuned
        param_distributions=param_distributions_perceptron,  # Hyperparameter space to explore
        n_iter=10,  # Number of different combinations to sample
        cv=5,  # Number of cross-validation folds to use
        scoring='accuracy',  # Metric to evaluate the performance of each combination
        verbose=2,  # Level of verbosity; higher values show more details during the search
        random_state=0  # Seed for reproducibility of results
    )

    # Prepare your data - Ensure y is classification-friendly (binary/multi-class labels)
    height = dfcases["height"].values  # Get height values as a NumPy array
    weight = dfcases["weight"].values  # Get weight values as a NumPy array
    bmi = dfcases["bmi"].values        # Get BMI values as a NumPy array

    # Create feature matrix X by combining height and weight into pairs
    X = np.array([[y, x] for x, y in zip(height, weight)])  # Each row contains [height, weight]

    # Convert BMI to categorical labels using digitization
    y = np.digitize(bmi, bins=[18.5, 24.9])  # Assign categories based on defined bins

    # Fit the models using RandomizedSearchCV
    random_search_perceptron.fit(X, y)  # Train the Perceptron model with the feature matrix X and labels y

    # Return best parameters and best score from the search
    return random_search_perceptron.best_params_, random_search_perceptron.best_score_

In [59]:
def mlp_tuning():
    # Define parameter distributions for tuning the MLPClassifier
    param_distributions_for_mlp = {
        'hidden_layer_sizes': [(50,)],  # Tuple specifying the number of neurons in hidden layers
        'activation': ['relu', 'tanh', 'logistic'],  # Activation functions to choose from
        'solver': ['lbfgs', 'sgd', 'adam'],  # Optimization algorithms for weight updates
        'alpha': [0.002, 0.001, 0.01],  # Regularization parameter to prevent overfitting
        'learning_rate': ['constant', 'invscaling', 'adaptive'],  # Learning rate strategies
        'max_iter': [100, 500, 1000],  # Maximum number of iterations for training
        'tol': [1e-3, 1e-4, 1e-5]  # Tolerance for stopping criteria; training stops if loss improvement is below this threshold
    }

    # Initialize the MLPClassifier with default parameters (to be tuned)
    mlp = MLPClassifier(hidden_layer_sizes=(6,), activation='logistic', solver='lbfgs', max_iter=1000)

    # Set up RandomizedSearchCV for hyperparameter tuning of the MLP model
    random_search_mlp = RandomizedSearchCV(
        estimator=mlp,  # The MLPClassifier instance to be tuned
        param_distributions=param_distributions_for_mlp,  # Hyperparameter space to explore
        n_iter=10,  # Number of different combinations to sample
        cv=5,  # Number of cross-validation folds to use
        scoring='accuracy',  # Metric to evaluate the performance of each combination
        verbose=2,  # Level of verbosity; higher values show more details during the search
        random_state=0  # Seed for reproducibility of results
    )

    # Prepare your data - Ensure y is classification-friendly (binary/multi-class labels)
    height = dfcases["height"].values  # Get height values as a NumPy array
    weight = dfcases["weight"].values  # Get weight values as a NumPy array
    bmi = dfcases["bmi"].values        # Get BMI values as a NumPy array

    # Create feature matrix X by combining height and weight into pairs
    X = np.array([[y, x] for x, y in zip(height, weight)])  # Each row contains [height, weight]

    # Convert BMI to categorical labels using digitization
    y = np.digitize(bmi, bins=[18.5, 24.9])  # Assign categories based on defined bins

    # Fit the models using RandomizedSearchCV
    random_search_mlp.fit(X, y)  # Train the MLP model with the feature matrix X and labels y

    # Return best parameters and best score from the search
    return random_search_mlp.best_params_, random_search_mlp.best_score_

In [60]:
def A3():
    # Extract height, weight, and BMI values from the DataFrame
    height = dfcases["height"].values  # Get height values as a NumPy array
    weight = dfcases["weight"].values  # Get weight values as a NumPy array
    bmi = dfcases["bmi"].values        # Get BMI values as a NumPy array

    # Create feature matrix X by combining height and weight into pairs
    X = np.array([[y, x] for x, y in zip(height, weight)])  # Each row contains [height, weight]

    # Convert BMI to categorical labels using digitization
    y = np.digitize(bmi, bins=[18.5, 24.9])  # Assign categories based on defined bins

    # Split data into training and testing sets (80% train, 20% test)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Set up classifiers with their respective best parameters from previous tuning
    model_collection = {
        'Perceptron': Perceptron(**random_search_perceptron.best_params_, random_state=42),
        'MLP': MLPClassifier(**random_search_mlp.best_params_, random_state=42),
        'SVM': SVC(random_state=42),
        'Decision Tree': DecisionTreeClassifier(random_state=42),
        'Random Forest': RandomForestClassifier(random_state=42),
        'CatBoost': CatBoostClassifier(random_state=42, verbose=False),
        'AdaBoost': AdaBoostClassifier(random_state=42),
        'XGBoost': XGBClassifier(random_state=42, objective='multi:softprob'),
        'Naive Bayes': GaussianNB()
    }

    # Define evaluation metrics to be calculated for each model
    eval_metrics = ['Accuracy', 'Precision', 'Recall', 'F1-score']

    # Create a DataFrame to store results of each model's performance
    results_df = pd.DataFrame(columns=['Model'] + eval_metrics)

    # Training every model and evaluating performance
    for model_name, model in model_collection.items():
        model.fit(X_train, y_train)  # Train the model on the training data

        y_pred = model.predict(X_test)  # Make predictions on the test data

        # Calculate performance statistics
        acc = accuracy_score(y_test, y_pred)  # Calculate accuracy
        prec = precision_score(y_test, y_pred, average='macro')  # Calculate precision
        rec = recall_score(y_test, y_pred, average='macro')  # Calculate recall
        f1 = f1_score(y_test, y_pred, average='macro')  # Calculate F1 score

        # Create a DataFrame for the current model's results
        fresh_results = pd.DataFrame({'Model': [model_name],
                                      'Accuracy': [acc],
                                      'Precision': [prec],
                                      'Recall': [rec],
                                      'F1-score': [f1]})

        # Append the current model's results to the results DataFrame
        results_df = pd.concat([results_df, fresh_results], ignore_index=True)

    return results_df  # Return the DataFrame containing all models' performance metrics

In [61]:
def main():
    best_params, score = perceptron_tuning()
    print("Best Perceptron Hyperparameters:", best_params)
    print("Best Perceptron Accuracy:", score)

    best_params, score = mlp_tuning()
    print("Best MLP Hyperparameters:", best_params)
    print("Best MLP Accuracy:", score)
    results = A3()
    print(results)

In [62]:
main()

Fitting 5 folds for each of 10 candidates, totalling 50 fits
[CV] END ...alpha=0.001, max_iter=100, penalty=l2, tol=1e-05; total time=   0.0s
[CV] END ...alpha=0.001, max_iter=100, penalty=l2, tol=1e-05; total time=   0.0s
[CV] END ...alpha=0.001, max_iter=100, penalty=l2, tol=1e-05; total time=   0.0s
[CV] END ...alpha=0.001, max_iter=100, penalty=l2, tol=1e-05; total time=   0.0s
[CV] END ...alpha=0.001, max_iter=100, penalty=l2, tol=1e-05; total time=   0.0s
[CV] END ....alpha=0.1, max_iter=1000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ....alpha=0.1, max_iter=1000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ....alpha=0.1, max_iter=1000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ....alpha=0.1, max_iter=1000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ....alpha=0.1, max_iter=1000, penalty=l2, tol=0.001; total time=   0.0s
[CV] END ....alpha=0.01, max_iter=500, penalty=l2, tol=1e-05; total time=   0.0s
[CV] END ....alpha=0.01, max_iter=500, penalty=l



[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   7.2s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   5.9s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   7.1s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   5.7s




[CV] END activation=relu, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=1e-05; total time=   6.8s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.2s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.0s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.0s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.4s




[CV] END activation=tanh, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=adam, tol=1e-05; total time=   3.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   1.6s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   0.4s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   4.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   3.0s
[CV] END activation=tanh, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   2.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   3.5s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   3.3s


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   4.6s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   4.4s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=lbfgs, tol=0.0001; total time=   3.9s
[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   3.8s
[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=  12.5s
[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   3.2s


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   6.1s


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=logistic, alpha=0.001, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=1000, solver=lbfgs, tol=1e-05; total time=   2.4s


ABNORMAL_TERMINATION_IN_LNSRCH.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   3.8s
[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   5.9s
[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   4.6s
[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=   5.0s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


[CV] END activation=logistic, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=lbfgs, tol=0.001; total time=  10.1s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   3.0s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   3.3s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   3.3s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   3.0s




[CV] END activation=tanh, alpha=0.01, hidden_layer_sizes=(50,), learning_rate=adaptive, max_iter=100, solver=sgd, tol=0.001; total time=   3.0s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   3.3s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   0.7s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   2.3s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   4.2s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=500, solver=sgd, tol=0.0001; total time=   3.9s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd,



[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.2s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.6s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.9s




[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=invscaling, max_iter=100, solver=sgd, tol=0.0001; total time=   1.3s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.5s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.5s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.4s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.5s
[CV] END activation=relu, alpha=0.002, hidden_layer_sizes=(50,), learning_rate=constant, max_iter=500, solver=sgd, tol=1e-05; total time=   0.3s


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)
  results_df = pd.concat([results_df, fresh_results], ignore_index=True)


Best MLP Hyperparameters: {'tol': 1e-05, 'solver': 'lbfgs', 'max_iter': 1000, 'learning_rate': 'constant', 'hidden_layer_sizes': (50,), 'alpha': 0.001, 'activation': 'logistic'}
Best MLP Accuracy: 0.9680616370282952


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
  self.n_iter_ = _check_optimize_result("lbfgs", opt_res, self.max_iter)


           Model  Accuracy  Precision    Recall  F1-score
0     Perceptron  0.715962   0.808979  0.468420  0.485217
1            MLP  0.946009   0.883765  0.935388  0.905400
2            SVM  0.970266   0.972376  0.916647  0.941421
3  Decision Tree  0.987480   0.984362  0.982701  0.983518
4  Random Forest  0.992175   0.994932  0.979788  0.987137
5       CatBoost  0.989045   0.980866  0.986077  0.983438
6       AdaBoost  0.718310   0.862928  0.542068  0.583505
7        XGBoost  0.982786   0.972937  0.965440  0.969112
8    Naive Bayes  0.748044   0.720717  0.590909  0.628476
