In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import accuracy_score, f1_score, precision_score
from sklearn.model_selection import train_test_split
from ucimlrepo import fetch_ucirepo

pd.set_option('display.width', 2000)

In [2]:
# fetch dataset
heart_disease = fetch_ucirepo(id=45)

X = heart_disease.data.features
Y = heart_disease.data.targets

# Convert target to binary
Y = Y['num'].apply(lambda x: 1 if x > 0 else 0)

# Join features and target
X['target'] = Y
# Remove records with missing values
X = X[~np.isnan(X).any(axis=1)]

In [3]:
def one_hot_encode(df, column, column_names):
    dummies = pd.get_dummies(df[column], prefix=column)
    column_names = [column + '_' + str(name) for name in column_names]
    dummies.columns = column_names
    dummies = dummies.astype('int64')
    df = pd.concat([df, dummies], axis=1)
    df.drop(column, axis=1, inplace=True)
    return df

In [4]:
X =  one_hot_encode(X, 'cp', ['typical_angina', 'atypical_angina', 'non-anginal_pain', 'asymptomatic'])
X = one_hot_encode(X, 'thal', ['normal', 'ST-T_wave_abnormality', 'left_ventricular_hypertrophy'])
X = one_hot_encode(X, 'slope', ['upsloping', 'flat', 'downsloping'])
X = one_hot_encode(X, 'restecg', ['normal', 'fixed_defect', 'reversable_defect'])

In [5]:
X.info()

<class 'pandas.core.frame.DataFrame'>
Index: 297 entries, 0 to 301
Data columns (total 23 columns):
 #   Column                             Non-Null Count  Dtype  
---  ------                             --------------  -----  
 0   age                                297 non-null    int64  
 1   sex                                297 non-null    int64  
 2   trestbps                           297 non-null    int64  
 3   chol                               297 non-null    int64  
 4   fbs                                297 non-null    int64  
 5   thalach                            297 non-null    int64  
 6   exang                              297 non-null    int64  
 7   oldpeak                            297 non-null    float64
 8   ca                                 297 non-null    float64
 9   target                             297 non-null    int64  
 10  cp_typical_angina                  297 non-null    int64  
 11  cp_atypical_angina                 297 non-null    int64  
 12 

In [6]:
# Extract target without missing values
Y = X['target']
X = X.drop('target', axis=1)

# Normalize data to [0, 1] range
X_norm = (X - X.min()) / (X.max() - X.min())

In [17]:
class Neuron():
    def __init__(self, num_of_weights, stand_dev=1):
        self.weights = np.random.normal(scale=stand_dev, size=num_of_weights)
        self.bias = np.random.normal(scale=stand_dev)
        self.X = None
        self.derivative = None

    @staticmethod
    def sigmoid(X):
        return np.exp(-np.logaddexp(0, -X))

    def sigmoid_derivative(self, X):
        return self.sigmoid(X) * (1 - self.sigmoid(X))

    def forward(self, inputs):
        self.X = inputs
        return self.sigmoid(np.dot(inputs, self.weights) + self.bias)

    def backward(self, error, weights_next_layer=None):
        if weights_next_layer is not None:
            error = error.T @ weights_next_layer
        self.derivative = error * self.sigmoid_derivative(np.dot(self.X,self.weights) + self.bias)
        return self.derivative
    
    def update(self, learning_rate):
        self.weights -=  learning_rate * np.dot(self.X.T, self.derivative)
        self.bias -= learning_rate * np.sum(self.derivative)
        self.X = None
        self.derivative = None

class NeuralNetwork():
    def __init__(self, num_of_inputs, hidden_layers, num_of_outputs, stand_dev=1.0):
        self.layers = []
        self.num_of_inputs = num_of_inputs
        self.hidden_layers = hidden_layers
        self.num_of_outputs = num_of_outputs
        self.num_of_hidden_layers = len(hidden_layers)
        self.stand_dev = stand_dev
        
        self.layers.append([Neuron(num_of_inputs, self.stand_dev) for _ in range(hidden_layers[0])])
        for i in range(1, self.num_of_hidden_layers):
            self.layers.append([Neuron(hidden_layers[i-1], self.stand_dev) for _ in range(hidden_layers[i])])
        if self.num_of_outputs == 1:
            self.layers.append([Neuron(hidden_layers[-1], self.stand_dev)])

    def cross_entropy(self, y, y_pred):
        return -np.sum(y * np.log(y_pred) + (1 - y) * np.log(1 - y_pred), axis=1)

    def cross_entropy_derivative(self, y, y_pred):
        return -np.sum(y / y_pred - (1 - y) / (1 - y_pred), axis=1)

    def forward(self, X):
        for layer in self.layers:
            X = np.array([neuron.forward(X) for neuron in layer]).T
        return X        

    def backward(self, output_error):
        # Iterate over the hidden layers in reverse order and calculate their gradients
        output_layer = self.layers[-1]
        output_error = np.array([neuron.backward(output_error) for neuron in output_layer])
        weights_next_layer = np.array([neuron.weights for neuron in output_layer]).T

        for layer in reversed(self.layers[:-1]):
            output_error = np.array([neuron.backward(output_error, weights_next_layer[index]) for index, neuron in enumerate(layer)])
            weights_next_layer = np.array([neuron.weights for neuron in layer]).T
        
        # Return the final gradients
        return output_error

    def update(self, learning_rate):
        for layer in self.layers:
            for neuron in layer:
                neuron.update(learning_rate)

    def fit(self, X, y, epochs=1000, batch_size=10, learning_rate=0.005):
        n_samples, n_features = X.shape
        # Jeśli batch_size jest większy niż liczba próbek, to ustawiamy go na liczbę próbek -> zwyczajnie uczymy na całym zbiorze GD
        if batch_size > n_samples:
            batch_size = n_samples
        cost_list = []
        for epoch in range(epochs):
            random_order = np.random.permutation(n_samples)
            X_shuffled = X.values[random_order]
            y_shuffled = y.values[random_order]
            # lista kosztów w paczkach dla każdej epoki
            cost_list_in_batch = []

            for batch_index in range(0, n_samples, batch_size):

                X_batch = X_shuffled[batch_index:batch_index + batch_size]
                y_batch = y_shuffled[batch_index:batch_index + batch_size].reshape(-1, 1)
                predictions = self.forward(X_batch)
                output_error = self.cross_entropy_derivative(y_batch, predictions)
                self.backward(output_error)
                self.update(learning_rate)
                
            cost = self.cross_entropy(y_batch, predictions)
            cost_list.append(np.mean(cost))

            if epoch % (epochs // 8) == 0:
                print(f'Epoch: {epoch}, Loss: {np.mean(cost)}')

            #jeśli koszt jest mniejszy niż epsilon to stop
            # if(i > 0 and abs(cost - self.cost_list[-1]) < self.epsilon ):
                # break
            # self.cost_list.append(cost)
            # iteration_cost = np.mean(cost_list_in_batch)
            # self.mean_cost_list.append(iteration_cost)
            # self.epoch_list.append(i)
        print(f'Epoch: {epoch}, Loss: {np.mean(cost)}')
        return self, cost_list


    def predict(self, X):
        return self.forward(X)

In [18]:
seed = 42
np.random.seed(seed)
X_train, X_test, Y_train, Y_test = train_test_split(X_norm, Y, test_size=0.2)
mlp = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=[8, 4], num_of_outputs=1)
mlp.fit(X_train, Y_train, epochs=1000, batch_size=10, learning_rate=0.005)
predictions = mlp.predict(X_test)
predictions = np.where(predictions > 0.5, 1, 0)
print(f'Accuracy: {accuracy_score(Y_test, predictions)}')
print(f'Precision: {precision_score(Y_test, predictions)}')
print(f'F1: {f1_score(Y_test, predictions)}')


Epoch: 0, Loss: 0.8366587713720809
Epoch: 125, Loss: 0.17555607988440508
Epoch: 250, Loss: 0.155472601691947
Epoch: 375, Loss: 0.19044257397183223
Epoch: 500, Loss: 0.08761761573339792
Epoch: 625, Loss: 0.26155733617304106
Epoch: 750, Loss: 0.02290420071616795
Epoch: 875, Loss: 0.030357829459238338
Epoch: 999, Loss: 0.11920780047209564
Accuracy: 0.85
Precision: 0.8
F1: 0.816326530612245


In [14]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=seed)
X_train_n, X_test_n, Y_train_n, Y_test_n = train_test_split(X_norm, Y, test_size=0.2, random_state=seed)

# domyslnie 
default_hidden_layers = [10, 5]

#1.Różnej wymiarowości warstwy ukrytej
dim_hidd_1 = [5, 5]
dim_hidd_2 = [20, 10]
dim_hidd_3 = [50, 25]
#2. Różnej wartości współczynnika uczenia
learning_rate_1 = 0.001
learning_rate_2 = 0.01
#3. Różnej wartości parametru standaryzacji
stand_dev_1 = 0.1
stand_dev_2 = 5
#4.danych znormalizownaych i nieznormalizowanych
unnorm = X_train, Y_train
#5. Różnej liczby watstw ukrytych
hidden_layers_size_1 = [10]
hidden_layers_size_2 = [10,5,2]
hidden_layers_size_3 = [10,5,3,2]

default_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=default_hidden_layers, num_of_outputs=1)
dim_hidd_1_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=dim_hidd_1, num_of_outputs=1)
dim_hidd_2_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=dim_hidd_2, num_of_outputs=1)
dim_hidd_3_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=dim_hidd_3, num_of_outputs=1)
learning_rate_1_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=default_hidden_layers, num_of_outputs=1)
learning_rate_2_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=default_hidden_layers, num_of_outputs=1)
stand_dev_1_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=default_hidden_layers, num_of_outputs=1, stand_dev=stand_dev_1)
stand_dev_2_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=default_hidden_layers, num_of_outputs=1, stand_dev=stand_dev_2)
unnorm_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=default_hidden_layers, num_of_outputs=1)
hidden_layers_size_1_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=hidden_layers_size_1, num_of_outputs=1)
hidden_layers_size_2_nn = NeuralNetwork(num_of_inputs=X_train.shape[1], hidden_layers=hidden_layers_size_2, num_of_outputs=1)

print('Default')
default_mlp, loss_history_1 = default_nn.fit(X_train, Y_train)
print('----------------')
print('Dim hidd 1')
dim_hidd_1_mlp, loss_history_2 = dim_hidd_1_nn.fit(X_train, Y_train)
print('----------------')
print('Dim hidd 2')
dim_hidd_2_mlp, loss_history_3 = dim_hidd_2_nn.fit(X_train, Y_train)
print('----------------')
print('Dim hidd 3')
dim_hidd_3_mlp, loss_history_4 = dim_hidd_3_nn.fit(X_train, Y_train)
print('----------------')
print('Learning rate 1')
learning_rate_1_mlp, loss_history_5 = learning_rate_1_nn.fit(X_train, Y_train, learning_rate=learning_rate_1)
print('----------------')
print('Learning rate 2')
learning_rate_2_mlp, loss_history_6 = learning_rate_2_nn.fit(X_train, Y_train, learning_rate=learning_rate_2)
print('----------------')
print('Stand dev 1')
stand_dev_1_mlp, loss_history_7 = stand_dev_1_nn.fit(X_train, Y_train)
print('----------------')
print('Stand dev 2')
stand_dev_2_mlp, loss_history_8 = stand_dev_2_nn.fit(X_train, Y_train)
print('----------------')
print('Unnorm')
unnorm_mlp, loss_history_9 = unnorm_nn.fit(X_train_n, Y_train_n)
print('----------------')
print('Hidden layers size 1')
hidden_layers_size_1_mlp, loss_history_10 = hidden_layers_size_1_nn.fit(X_train, Y_train)
print('----------------')
print('Hidden layers size 2')
hidden_layers_size_2_mlp, loss_history_11 = hidden_layers_size_2_nn.fit(X_train, Y_train)
print('----------------')


Default
Epoch: 0, Loss: 0.6849658572744125
Epoch: 250, Loss: 0.7035028164177634
Epoch: 500, Loss: 0.6874969108823227
Epoch: 750, Loss: 0.7217730533432238
Epoch: 999, Loss: 0.6976617381853722
----------------
Dim hidd 1
Epoch: 0, Loss: 0.6049334741042519
Epoch: 250, Loss: 0.7178726077799862
Epoch: 500, Loss: 0.6891453650748194
Epoch: 750, Loss: 0.7178087018408507
Epoch: 999, Loss: 0.7053654936786187
----------------
Dim hidd 2
Epoch: 0, Loss: 0.6848365447275377
Epoch: 250, Loss: 0.6522894052369967
Epoch: 500, Loss: 0.6865157910570941
Epoch: 750, Loss: 0.7255300875332213
Epoch: 999, Loss: 0.6868601172322689
----------------
Dim hidd 3
Epoch: 0, Loss: 0.5606495424589736
Epoch: 250, Loss: 0.7097434756326858
Epoch: 500, Loss: 0.6862328437975597
Epoch: 750, Loss: 0.7003487158700853
Epoch: 999, Loss: 0.6875165321268286
----------------
Learning rate 1
Epoch: 0, Loss: 0.8177304834642489
Epoch: 250, Loss: 0.7153391101819008
Epoch: 500, Loss: 0.6032681239541169
Epoch: 750, Loss: 0.35341291434788

TypeError: NeuralNetwork.fit() got an unexpected keyword argument 'stand_dev'

In [None]:

predictions = default_mlp.predict(X_test)
predictions = np.where(predictions > 0.5, 1, 0)
print(f'Accuracy: {accuracy_score(Y_test, predictions)}')
print(f'Precision: {precision_score(Y_test, predictions)}')
print(f'F1: {f1_score(Y_test, predictions)}')




XD