In [None]:

import pandas as pd
from datetime import datetime
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report


df = pd.read_csv("synthetic_blood_donation_data.csv")

# Convert LastDonationDate to DaysSinceLastDonation
df['LastDonationDate'] = pd.to_datetime(df['LastDonationDate'])
today = pd.to_datetime(datetime.today().date())
df['DaysSinceLastDonation'] = (today - df['LastDonationDate']).dt.days
df.drop(columns=['LastDonationDate'], inplace=True)

# Encode all categorical features
label_encoders = {}
for column in df.columns:
    if df[column].dtype == 'object':
        le = LabelEncoder()
        df[column] = le.fit_transform(df[column])
        label_encoders[column] = le

# Split into features and target
X = df.drop(columns=['Eligible'])
y = df['Eligible']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(random_state=42)
model.fit(X_train, y_train)

# Evaluate the model
y_pred = model.predict(X_test)
report = classification_report(y_test, y_pred, output_dict=True)
report


{'0': {'precision': 0.9952127659574468,
  'recall': 1.0,
  'f1-score': 0.9976006398293789,
  'support': 1871.0},
 '1': {'precision': 1.0,
  'recall': 0.9302325581395349,
  'f1-score': 0.963855421686747,
  'support': 129.0},
 'accuracy': 0.9955,
 'macro avg': {'precision': 0.9976063829787234,
  'recall': 0.9651162790697674,
  'f1-score': 0.9807280307580629,
  'support': 2000.0},
 'weighted avg': {'precision': 0.9955215425531915,
  'recall': 0.9955,
  'f1-score': 0.995424073259179,
  'support': 2000.0}}

Save

In [2]:
import joblib

# Save model
joblib.dump(model, 'BloodDonationEligibilityModel2.joblib')

# Save label encoders
joblib.dump(label_encoders, 'LabelEncoders2.joblib')

['LabelEncoders2.joblib']