In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Step 1: Load the dataset
df = pd.read_csv("/content/kidney_disease.csv")
df.columns = df.columns.str.strip()

# Step 2: Handle missing values
num_cols = df.select_dtypes(include=['float64', 'int64']).columns
cat_cols = df.select_dtypes(include=['object']).columns

df[num_cols] = df[num_cols].fillna(df[num_cols].mean())
df[cat_cols] = df[cat_cols].fillna(df[cat_cols].mode().iloc[0])

# Step 3: Encode categorical columns
encoder = LabelEncoder()
for col in cat_cols:
    df[col] = encoder.fit_transform(df[col])

# Step 4: Split into features (X) and target (y)
X = df.drop('classification', axis=1)
y = df['classification']

# Step 5: Add small random noise to numeric columns
numeric_cols = X.select_dtypes(include=[np.number]).columns
for col in numeric_cols:
    noise = np.random.normal(0, 0.5, size=X[col].shape)  # mean=0, std=0.05
    X[col] = X[col] + noise

# Flip 10% of the labels, but ensure they stay within the valid class range (0 or 2)
np.random.seed(42)
flip_idx = np.random.choice(y.index, size=int(0.1 * len(y)), replace=False)
y_noisy = y.copy()
# Flip labels between 0 and 2 only
for idx in flip_idx:
    y_noisy.loc[idx] = 2 if y_noisy.loc[idx] == 0 else 0

# Step 6: Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X, y_noisy, test_size=0.2, random_state=42, stratify=y
)

# Step 7: Build and train the model
model = RandomForestClassifier()
model.fit(X_train, y_train)

# Step 8: Make predictions
y_pred = model.predict(X_test)

# Step 9: Check the accuracy
print("Accuracy:", accuracy_score(y_test, y_pred))

# Step 10: Show actual vs predicted
results = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print(results.head(20))


Accuracy: 0.925
     Actual  Predicted
161       0          0
20        0          0
392       2          2
303       2          2
339       2          2
249       0          0
53        0          0
88        0          0
260       2          2
103       0          0
75        0          0
130       0          0
387       2          2
14        0          0
291       2          2
58        0          0
188       0          0
361       0          2
175       0          0
353       2          2


In [None]:
from sklearn.metrics import confusion_matrix, classification_report, precision_score, recall_score, f1_score

# Step 1: Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:")
print(conf_matrix)

# Step 2: Classification Report (includes precision, recall, f1-score)
class_report = classification_report(y_test, y_pred)
print("\nClassification Report:")
print(class_report)

# Step 3: Precision, Recall, and F1-Score for each class
precision = precision_score(y_test, y_pred, average=None)  # average=None gives precision for each class
recall = recall_score(y_test, y_pred, average=None)  # average=None gives recall for each class
f1 = f1_score(y_test, y_pred, average=None)  # average=None gives F1 score for each class

print("\nPrecision for each class:")
print(precision)

print("\nRecall for each class:")
print(recall)

print("\nF1-Score for each class:")
print(f1)

# You can also compute macro and weighted averages if needed
precision_macro = precision_score(y_test, y_pred, average='macro')
recall_macro = recall_score(y_test, y_pred, average='macro')
f1_macro = f1_score(y_test, y_pred, average='macro')

print("\nMacro Average Precision:", precision_macro)
print("Macro Average Recall:", recall_macro)
print("Macro Average F1-Score:", f1_macro)

# Weighted averages for handling imbalanced classes
precision_weighted = precision_score(y_test, y_pred, average='weighted')
recall_weighted = recall_score(y_test, y_pred, average='weighted')
f1_weighted = f1_score(y_test, y_pred, average='weighted')

print("\nWeighted Average Precision:", precision_weighted)
print("Weighted Average Recall:", recall_weighted)
print("Weighted Average F1-Score:", f1_weighted)


Confusion Matrix:
[[46  2]
 [ 4 28]]

Classification Report:
              precision    recall  f1-score   support

           0       0.92      0.96      0.94        48
           2       0.93      0.88      0.90        32

    accuracy                           0.93        80
   macro avg       0.93      0.92      0.92        80
weighted avg       0.93      0.93      0.92        80


Precision for each class:
[0.92       0.93333333]

Recall for each class:
[0.95833333 0.875     ]

F1-Score for each class:
[0.93877551 0.90322581]

Macro Average Precision: 0.9266666666666667
Macro Average Recall: 0.9166666666666667
Macro Average F1-Score: 0.9210006583278473

Weighted Average Precision: 0.9253333333333333
Weighted Average Recall: 0.925
Weighted Average F1-Score: 0.9245556287030942
