<a href="https://colab.research.google.com/github/Tanushree-233/Machine_Learning/blob/main/ML_MultilayerPerceptron.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from google.colab import files
uploaded = files.upload()

Saving kidney_disease.csv to kidney_disease.csv


In [6]:
import pandas as pd
import numpy as np

# ------------------------------
# Load dataset
# ------------------------------
df = pd.read_csv('kidney_disease.csv')

# ------------------------------
# Data Cleaning and Preprocessing
# ------------------------------

# Clean up non-numeric characters and convert to numeric
cols_to_numeric = ['pcv', 'wc', 'rc']
for col in cols_to_numeric:
    df[col] = df[col].astype(str).str.replace('\t', '', regex=False).str.replace('?', '', regex=False)
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Clean up and map categorical features
df['dm'] = df['dm'].astype(str).str.replace(' ', '', regex=False).str.replace('\t', '', regex=False)
df['dm'] = df['dm'].map({'yes': 1, 'no': 0})

df['cad'] = df['cad'].astype(str).str.replace('\t', '', regex=False)
df['cad'] = df['cad'].map({'yes': 1, 'no': 0})

df['classification'] = df['classification'].astype(str).str.replace('\t', '', regex=False)
df['classification'] = df['classification'].map({'ckd': 1, 'notckd': 0})

# Convert other categorical columns to numerical using one-hot encoding or similar if needed,
# but for simplicity and to match the original MLP structure, let's drop them for now
# as the original code only normalized numerical features.
# In a real scenario, proper encoding/handling of these columns would be crucial.
categorical_cols_to_drop = ['rbc', 'pc', 'pcc', 'ba', 'htn', 'appet', 'pe', 'ane']
df = df.drop(columns=categorical_cols_to_drop)


# Handle missing values by imputing with the mean for numeric columns
for col in df.select_dtypes(include=np.number).columns:
    df[col] = df[col].fillna(df[col].mean())


# Drop the 'id' column as it is not a feature
df = df.drop(columns=['id'])


# Assuming last column is target
X = df.iloc[:, :-1].values
y = df.iloc[:, -1].values.reshape(-1, 1)

# Normalize input features
# Ensure X is of a numeric type before normalization
X = X.astype(float)
X = X / X.max(axis=0)


# ------------------------------
# Initialize MLP parameters
# ------------------------------
input_size = X.shape[1]
hidden_size = 3      # Number of neurons in hidden layer
output_size = 1
lr = 0.5            # Learning rate
epochs = 5000

# Initialize weights and biases randomly
np.random.seed(42)
W1 = np.random.randn(input_size, hidden_size)
b1 = np.zeros((1, hidden_size))
W2 = np.random.randn(hidden_size, output_size)
b2 = np.zeros((1, output_size))

# Activation function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

def sigmoid_derivative(x):
    return x * (1 - x)

# ------------------------------
# Training MLP with Backpropagation
# ------------------------------
for epoch in range(epochs):
    # Forward pass
    Z1 = np.dot(X, W1) + b1
    A1 = sigmoid(Z1)
    Z2 = np.dot(A1, W2) + b2
    A2 = sigmoid(Z2)

    # Compute error
    error = y - A2

    # Backward pass
    dA2 = error * sigmoid_derivative(A2)
    dW2 = np.dot(A1.T, dA2)
    db2 = np.sum(dA2, axis=0, keepdims=True)

    dA1 = np.dot(dA2, W2.T) * sigmoid_derivative(A1)
    dW1 = np.dot(X.T, dA1)
    db1 = np.sum(dA1, axis=0)

    # Update weights and biases
    W2 += lr * dW2
    b2 += lr * db2
    W1 += lr * dW1
    b1 += lr * db1

    # Print weight updates every 1000 epochs
    if (epoch+1) % 1000 == 0:
        print(f"Epoch {epoch+1}")
        print("W1:", W1)
        print("b1:", b1)
        print("W2:", W2)
        print("b2:", b2)
        print("-"*50)

# ------------------------------
# Final Output
# ------------------------------
predictions = A2 > 0.5
accuracy = np.mean(predictions == y)
print(f"Final Accuracy: {accuracy*100:.2f}%")

Epoch 1000
W1: [[ 1.12994062 -3.55809456  0.80778966]
 [ 1.97638409 -2.67402212 -0.1162592 ]
 [ 2.58688496 -4.62126505 -0.21156115]
 [ 0.91647244 -2.37702632 -0.36743663]
 [ 0.41616927 -2.82459845 -1.70598788]
 [-0.17504342 -3.02424188  0.39815321]
 [-0.70033007 -2.52245472  1.52528271]
 [-0.15730198 -0.28266094 -1.40565784]
 [ 0.29228357 -4.37521484 -0.93798875]
 [ 0.48006843 -1.15387598 -0.26519133]
 [-0.01069612 -1.35405985  0.12673234]
 [-0.43996696 -2.51567374 -1.07416604]
 [ 0.55425813 -3.81778574 -1.24480881]
 [ 0.72495622 -2.12306153  0.30138791]
 [ 0.56555916 -3.84173137 -1.38893425]
 [-0.52350535 -1.38036738  1.12557874]]
b1: [[ 1.01950664 -5.4469072   0.26102418]]
W2: [[11.27881841]
 [ 7.56644367]
 [ 1.23815539]]
b2: [[15.14451462]]
--------------------------------------------------
Epoch 2000
W1: [[ 1.12994062 -3.55809456  0.80778964]
 [ 1.97638408 -2.67402212 -0.11625921]
 [ 2.58688495 -4.62126505 -0.2115612 ]
 [ 0.91647244 -2.37702632 -0.36743663]
 [ 0.41616927 -2.8245984

In [5]:
for col in df.select_dtypes(include='object').columns:
    print(f"Unique values in '{col}': {df[col].unique()}")

Unique values in 'rbc': [nan 'normal' 'abnormal']
Unique values in 'pc': ['normal' 'abnormal' nan]
Unique values in 'pcc': ['notpresent' 'present' nan]
Unique values in 'ba': ['notpresent' 'present' nan]
Unique values in 'pcv': ['44' '38' '31' '32' '35' '39' '36' '33' '29' '28' nan '16' '24' '37' '30'
 '34' '40' '45' '27' '48' '\t?' '52' '14' '22' '18' '42' '17' '46' '23'
 '19' '25' '41' '26' '15' '21' '43' '20' '\t43' '47' '9' '49' '50' '53'
 '51' '54']
Unique values in 'wc': ['7800' '6000' '7500' '6700' '7300' nan '6900' '9600' '12100' '4500'
 '12200' '11000' '3800' '11400' '5300' '9200' '6200' '8300' '8400' '10300'
 '9800' '9100' '7900' '6400' '8600' '18900' '21600' '4300' '8500' '11300'
 '7200' '7700' '14600' '6300' '\t6200' '7100' '11800' '9400' '5500' '5800'
 '13200' '12500' '5600' '7000' '11900' '10400' '10700' '12700' '6800'
 '6500' '13600' '10200' '9000' '14900' '8200' '15200' '5000' '16300'
 '12400' '\t8400' '10500' '4200' '4700' '10900' '8100' '9500' '2200'
 '12800' '11200' 

In [4]:
display(df.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 26 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   id              400 non-null    int64  
 1   age             391 non-null    float64
 2   bp              388 non-null    float64
 3   sg              353 non-null    float64
 4   al              354 non-null    float64
 5   su              351 non-null    float64
 6   rbc             248 non-null    object 
 7   pc              335 non-null    object 
 8   pcc             396 non-null    object 
 9   ba              396 non-null    object 
 10  bgr             356 non-null    float64
 11  bu              381 non-null    float64
 12  sc              383 non-null    float64
 13  sod             313 non-null    float64
 14  pot             312 non-null    float64
 15  hemo            348 non-null    float64
 16  pcv             330 non-null    object 
 17  wc              295 non-null    obj

None