In [57]:
import numpy as np
import matplotlib.pyplot as plt
import mlp # Import the mlp module
import layer # Import the layer module
from mlp import MLP # Still import the MLP class if needed for direct use
from utils import generate_data, plot_classification, accuracy, mlp_networkx_view

import importlib

# Correcting the relu function in layer.py directly
with open('/content/layer.py', 'r') as f:
    lines = f.readlines()

# Assuming relu function is around line 25 in layer.py based on previous tracebacks
# This is a robust way to modify the function if its exact line number might shift slightly
relu_start_index = -1
relu_end_index = -1
for i, line in enumerate(lines):
    if "def relu(seld, x)" in line:
        relu_start_index = i
    if relu_start_index != -1 and "return" in line:
        # Assuming the return statement is the end of the relu function
        relu_end_index = i
        break

if relu_start_index != -1 and relu_end_index != -1:
    # Replace the relu function implementation
    lines[relu_start_index + 1] = "        return np.maximum(0, x)\n"
    # Remove any extra lines if the old implementation was longer
    for i in range(relu_end_index + 1, relu_start_index + 2, -1): # Adjust range if needed
        if i < len(lines):
            # Check if line is part of the old relu implementation or next function
            # This is a bit tricky, may need manual adjustment if the structure is complex
            # For now, just assume the next line after return is fine or empty
            pass

with open('/content/layer.py', 'w') as f:
    f.writelines(lines)

importlib.reload(mlp)
importlib.reload(layer)

# The rest of the cell content, potentially related to parameters and MLP training/prediction,
# would follow here if it were part of the original cell.
# For this fix, we only need to ensure the layer.py modification and reload.
# The original content of this cell is no longer needed after these imports and fixes are applied.


<module 'layer' from '/content/layer.py'>

In [53]:
import pandas as pd

# load data
df = pd.read_csv('https://docs.google.com/spreadsheets/d/e/2PACX-1vTRfoQAojPwNsw33zUSpBg32DuK2z0WvLPptGOncIXa38tN3RWk9puvelcmsIIhFb_XF12RD7PdqCwf/pub?output=csv')

In [54]:
no_Cholesterol = df['Cholesterol'] == 0
no_RestingBP = df['RestingBP'] == 0


#class_df_clean excludes rows that have no values == 0 in the above columns
class_df_clean = df[~(no_Cholesterol |
                                     no_RestingBP )]
class_df_clean.describe()

from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.compose import make_column_selector, make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

## Define X and y
target = 'HeartDisease'

X = class_df_clean.drop(columns=target).copy()
y = class_df_clean[target].copy()

# Perfoming a train-test-split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state = 42)

# Defining a list of features for both the categorical and the numerical columns

cat_feature = make_column_selector(dtype_include='object')   # Creating a categorical data selector
num_feature = make_column_selector(dtype_include='number')   # Creating a numeric data selector

# Instantiating the Transformers

impute_cat = SimpleImputer(strategy='most_frequent') #better to keep the ffil but for simplicity reasons I will keep this one
impute_num = SimpleImputer(strategy='median') # we can be more sophisticated and make a mean imputation for widht like we did earlier, but I'll keep it simple for now
ohe_encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore')   # It is a numinal column with no order to it => So I used the One-hot encoding
scaler = StandardScaler()

# Making pipelines for each category

pip_cat = make_pipeline(impute_cat, ohe_encoder)
pip_num = make_pipeline(impute_num, scaler)

# Defining a tuple for each pathway

categorical_tup = ('Categorical', pip_cat, cat_feature)
numerical_tup = ('Numerical', pip_num, num_feature)

# Instantiating the ColumnTransformer

col_transformer = ColumnTransformer([numerical_tup, categorical_tup], verbose_feature_names_out=False)
col_transformer

X_train_processed = col_transformer.fit_transform(X_train)
X_test_processed = col_transformer.transform(X_test)

In [58]:
# constante de numérisation :
Nt = 100 #nombre de prediction

# constante du probleme
input_size = X_train_processed.shape[1]
hide_layer_size1, activation_fonction1 = 8, 'relu'
hide_layer_size2, activation_fonction2 = 8, 'relu'
hide_layer_size3, activation_fonction3 = 4, 'sigmoid'
output_size, activation_fonction4 = 1, 'sigmoid' # 1 car on veut savoir au dessus ou en dessous de g

# creer notre reseau de neuronne simple (MLP)
mlp = MLP([input_size, hide_layer_size1,hide_layer_size2, hide_layer_size3 , output_size],[activation_fonction1,activation_fonction2,activation_fonction3,activation_fonction4])

# entraine notre mlp
# Convert y_train to a numpy array and reshape to a column vector
mlp.train(X_train_processed, y_train.values.reshape(-1, 1), Nt)

# Prédictions finales
# Corrected loop to use range() and predict on X_test_processed
z_pred = np.array([mlp.forward(X_test_processed[i]) for i in range(len(X_test_processed))]) # donne des valeurs entre 0 et 1
z_pred_int =  np.array([mlp.predict_label(X_test_processed[i]) for i in range(len(X_test_processed))]) # donne soit 1 soit 0

# precision de notre MLP
print(f"Précision : {accuracy(y_test.values, z_pred_int) * 100:.2f}%")

mlp_networkx_view(mlp)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()