<a href="https://colab.research.google.com/github/Faiz-ahmed-13/blood_donation_predictor/blob/main/blood_donation_predictor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [14]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score
from imblearn.over_sampling import SMOTE

# Load the dataset
data = pd.read_csv("/transfusion.csv")

# Rename columns for clarity
data.columns = ['Recency', 'Frequency', 'Monetary', 'Time', 'Donated']

# Split into features (X) and target (y)
X = data.drop('Donated', axis=1)
y = data['Donated']

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train a Random Forest model with class weights
model = RandomForestClassifier(random_state=42, class_weight={0: 1, 1: 3})
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
print(f"Accuracy: {accuracy_score(y_test, y_pred)}")
print("Classification Report:")
print(classification_report(y_test, y_pred))

# ROC-AUC Score
y_pred_proba = model.predict_proba(X_test)[:, 1]
roc_auc = roc_auc_score(y_test, y_pred_proba)
print("ROC-AUC Score:", roc_auc)

Accuracy: 0.7631578947368421
Classification Report:
              precision    recall  f1-score   support

           0       0.83      0.70      0.76       122
           1       0.71      0.83      0.77       106

    accuracy                           0.76       228
   macro avg       0.77      0.77      0.76       228
weighted avg       0.77      0.76      0.76       228

ROC-AUC Score: 0.8250463965357253


In [16]:
from sklearn.model_selection import cross_val_score
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import RandomForestClassifier

# Handle class imbalance using SMOTE
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X, y)

# Initialize the model
model = RandomForestClassifier(random_state=42, class_weight={0: 1, 1: 3})

# Perform 5-fold cross-validation
cv_scores = cross_val_score(model, X_resampled, y_resampled, cv=5, scoring='accuracy')
print("Cross-Validation Accuracy Scores:", cv_scores)
print("Mean CV Accuracy:", cv_scores.mean())

Cross-Validation Accuracy Scores: [0.49122807 0.70175439 0.82017544 0.73245614 0.88157895]
Mean CV Accuracy: 0.7254385964912281


# **saving the file**

In [17]:
import joblib

# Save the trained model
joblib.dump(model, 'transfusion_model.pkl')

['transfusion_model.pkl']

In [18]:
from google.colab import files
files.download('transfusion_model.pkl')

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>