In [None]:
# Step 1: Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score, RocCurveDisplay


In [None]:
# Step 2: Load Dataset
data = pd.read_csv('blood.csv')
data.head()

In [None]:
# Step 3: Data Exploration
print("Dataset Shape:", data.shape)
print(data.describe())
print(data.isnull().sum())


In [None]:
# Step 4: Visualizations
sns.countplot(x='Class', data=data)
plt.show()

sns.heatmap(data.corr(), annot=True, cmap='coolwarm')
plt.show()


In [None]:
# Step 5: Preprocessing
X = data.drop('Class', axis=1)
y = data['Class']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


In [None]:
# Step 6: Train Model
rf = RandomForestClassifier(n_estimators=100, random_state=42)
rf.fit(X_train_scaled, y_train)
y_pred = rf.predict(X_test_scaled)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(confusion_matrix(y_test, y_pred))
print(classification_report(y_test, y_pred))
RocCurveDisplay.from_estimator(rf, X_test_scaled, y_test)
plt.show()


In [None]:
# Step 7: Feature Importance
importance = rf.feature_importances_
features = pd.DataFrame({'Feature': X.columns, 'Importance': importance}).sort_values(by='Importance', ascending=False)
print(features)
sns.barplot(data=features, x='Importance', y='Feature')
plt.show()


In [None]:
# Step 8: Recommendation Function
def generate_recommendation(input_data):
    pred = rf.predict(scaler.transform([input_data]))[0]
    return "Likely donor" if pred == 1 else "Unlikely donor"

example = [2, 20, 5000, 45]
generate_recommendation(example)
