In [1]:
!pip install faker
!pip install pandas scikit-learn imbalanced-learn streamlit joblib faker






 Step 1: Create Synthetic Dataset (CSV)


In [2]:
import pandas as pd
import random
from faker import Faker

fake = Faker()

blood_groups = ['A+', 'A-', 'B+', 'B-', 'AB+', 'AB-', 'O+', 'O-']
data = []

for i in range(1000):
    data.append({
        'Donor_ID': f'DR{i+1:04d}',
        'Name': fake.name(),
        'Blood_Group': random.choice(blood_groups),
        'Location': fake.city(),
        'Latitude': fake.latitude(),
        'Longitude': fake.longitude(),
        'Last_Donation_Days_Ago': random.randint(0, 180),
        'Availability': random.choice(['Yes', 'No']),
        'Screened': random.choice(['Yes', 'No']),
    })

df = pd.DataFrame(data)
df.to_csv('donor_dataset.csv', index=False)


Recommended Match (Based on Rules)rule based prediction

In [18]:
import pandas as pd

# Rule-Based Matching Function
def is_donor_match(blood_group, last_donated_days, availability, screened):
    if availability.lower() != "yes":
        return 0
    if screened.lower() != "yes":
        return 0
    if last_donated_days < 56:  # Donors are advised to wait 56 days
        return 0
    if blood_group.upper() not in ['A+', 'B+', 'O+', 'AB+']:
        return 0  # Only consider common compatible groups (basic rule)
    return 1

# Sample usage
sample_donors = pd.DataFrame({
    'BloodGroup': ['A+', 'B-', 'O+', 'AB+', 'A-'],
    'LastDonationDays': [60, 20, 90, 100, 70],
    'Availability': ['Yes', 'No', 'Yes', 'Yes', 'Yes'],
    'Screened': ['Yes', 'Yes', 'No', 'Yes', 'Yes']
})

# Apply rules to each row
sample_donors['Match'] = sample_donors.apply(
    lambda row: is_donor_match(row['BloodGroup'], row['LastDonationDays'], row['Availability'], row['Screened']),
    axis=1
)

print("Rules-Based Prediction:")
print(sample_donors)


Rules-Based Prediction:
  BloodGroup  LastDonationDays Availability Screened  Match
0         A+                60          Yes      Yes      1
1         B-                20           No      Yes      0
2         O+                90          Yes       No      0
3        AB+               100          Yes      Yes      1
4         A-                70          Yes      Yes      0


In [4]:
import pandas as pd

# Load your CSV
df = pd.read_csv('donor_dataset.csv')

# Define blood compatibility
blood_compatibility = {
    'A+': ['A+', 'A-', 'O+', 'O-'],
    'B+': ['B+', 'B-', 'O+', 'O-'],
    'AB+': ['A+', 'A-', 'B+', 'B-', 'AB+', 'AB-', 'O+', 'O-'],
    'O+': ['O+', 'O-'],
    'A-': ['A-', 'O-'],
    'B-': ['B-', 'O-'],
    'AB-': ['A-', 'B-', 'AB-', 'O-'],
    'O-': ['O-']
}

# Rule-based function
def is_match(donor, recipient_blood_group='A+'):
    return (donor['Blood_Group'] in blood_compatibility[recipient_blood_group]) and \
           (donor['Availability'] == 'Yes') and \
           (donor['Screened'] == 'Yes') and \
           (donor['Last_Donation_Days_Ago'] > 90)

# Apply function
df['Match'] = df.apply(lambda row: int(is_match(row, 'A+')), axis=1)

# Save updated CSV with 'Match' column
df.to_csv('donor_dataset_with_match.csv', index=False)

# Preview match count
print("Total Matched Donors:", df['Match'].sum())
print(df[['Donor_ID', 'Blood_Group', 'Availability', 'Screened', 'Last_Donation_Days_Ago', 'Match']].head())


Total Matched Donors: 60
  Donor_ID Blood_Group Availability Screened  Last_Donation_Days_Ago  Match
0   DR0001          A-          Yes      Yes                     160      1
1   DR0002          O+           No      Yes                     139      0
2   DR0003          A-          Yes      Yes                       8      0
3   DR0004          O-           No      Yes                      47      0
4   DR0005          B-          Yes      Yes                     137      0


Step 4: Build a Machine Learning Classifier

In [5]:
#1. Prepare labeled dataset (simulate Match column)
import numpy as np

df['Match'] = df.apply(
    lambda row: 1 if is_match(row, recipient_blood_group='A+') else 0,
    axis=1
)


sample model accuracy


In [8]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE

# Load data
df = pd.read_csv('donor_dataset_with_match.csv')

# Label encode categorical features
le = LabelEncoder()
for col in ['Blood_Group', 'Availability', 'Screened']:
    df[col] = le.fit_transform(df[col])

# Features and labels
X = df[['Blood_Group', 'Last_Donation_Days_Ago', 'Availability', 'Screened']]
y = df['Match']

# Check class balance
print("Original Match value counts:\n", y.value_counts())

# Apply SMOTE for balancing
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X, y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(
    X_resampled, y_resampled, test_size=0.2, random_state=42
)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
import streamlit as st
import pandas as pd
import joblib
from sklearn.preprocessing import LabelEncoder

# Load the donor dataset
df = pd.read_csv("donor_dataset_with_match.csv")

# Load the trained model
model = joblib.load("donor_match_model.pkl")

# Streamlit App Title
st.title("HemoTrack AI - Donor Prediction System")

# Recipient blood group selection
blood_groups = ['A+', 'A-', 'B+', 'B-', 'AB+', 'AB-', 'O+', 'O-']
recipient_blood_group = st.selectbox("Select Recipient's Blood Group", blood_groups)

# Toggle mode
mode = st.radio("Choose Matching Method", ['Rule-Based', 'ML Model'])

# Define compatibility
blood_compatibility = {
    'A+': ['A+', 'A-', 'O+', 'O-'],
    'B+': ['B+', 'B-', 'O+', 'O-'],
    'AB+': blood_groups,
    'O+': ['O+', 'O-'],
    'A-': ['A-', 'O-'],
    'B-': ['B-', 'O-'],
    'AB-': ['AB-', 'A-', 'B-', 'O-'],
    'O-': ['O-']
}

# Rule-based function
def is_match(donor):
    return (donor['Blood_Group'] in blood_compatibility[recipient_blood_group]) and \
           (donor['Availability'] == 'Yes') and \
           (donor['Screened'] == 'Yes') and \
           (donor['Last_Donation_Days_Ago'] > 90)

# Process on button click
if st.button("Find Matching Donors"):
    if mode == 'Rule-Based':
        df['Rule_Match'] = df.apply(is_match, axis=1)
        matched = df[df['Rule_Match'] == True]
    else:
        # Prepare features
        le = LabelEncoder()
        temp_df = df.copy()
        for col in ['Blood_Group', 'Availability', 'Screened']:
            temp_df[col] = le.fit_transform(temp_df[col])

        X = temp_df[['Blood_Group', 'Last_Donation_Days_Ago', 'Availability', 'Screened']]
        df['ML_Predicted_Match'] = model.predict(X)
        matched = df[df['ML_Predicted_Match'] == 1]

    st.success(f"Found {len(matched)} matched donor(s)")
    st.dataframe(matched[['Donor_ID', 'Name', 'Blood_Group', 'Location', 'Last_Donation_Days_Ago']])

    # Optional: Download filtered results
    st.download_button(
        label="Download Matched Donors CSV",
        data=matched.to_csv(index=False).encode('utf-8'),
        file_name='matched_donors.csv',
        mime='text/csv'
    )


Original Match value counts:
 Match
0    940
1     60
Name: count, dtype: int64
Accuracy: 0.9707446808510638

Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.96      0.97       182
           1       0.96      0.98      0.97       194

    accuracy                           0.97       376
   macro avg       0.97      0.97      0.97       376
weighted avg       0.97      0.97      0.97       376



2025-08-06 14:35:12.209 
  command:

    streamlit run /usr/local/lib/python3.11/dist-packages/colab_kernel_launcher.py [ARGUMENTS]
2025-08-06 14:35:12.223 Session state does not function when running a script without `streamlit run`


In [11]:
!pip install streamlit pyngrok




In [12]:
!pip install streamlit pyngrok --quiet


train_and_save_model.py(Random forest)| donor_ml_training.py (AI-Powered Match (Smarter Prediction)




In [13]:
# File: train_and_save_model.py
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from imblearn.over_sampling import SMOTE
import joblib

# Load data
df = pd.read_csv('donor_dataset_with_match.csv')

# Label encode categorical features
le_bg = LabelEncoder()
le_avail = LabelEncoder()
le_screened = LabelEncoder()

df['Blood_Group'] = le_bg.fit_transform(df['Blood_Group'])
df['Availability'] = le_avail.fit_transform(df['Availability'])
df['Screened'] = le_screened.fit_transform(df['Screened'])

# Save encoders
joblib.dump(le_bg, 'encoder_blood_group.pkl')
joblib.dump(le_avail, 'encoder_availability.pkl')
joblib.dump(le_screened, 'encoder_screened.pkl')

# Features and labels
X = df[['Blood_Group', 'Last_Donation_Days_Ago', 'Availability', 'Screened']]
y = df['Match']

# SMOTE for balancing
sm = SMOTE(random_state=42)
X_resampled, y_resampled = sm.fit_resample(X, y)

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

# Train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model
joblib.dump(model, 'donor_match_model.pkl')

# Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))


Accuracy: 0.9707446808510638
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.96      0.97       182
           1       0.96      0.98      0.97       194

    accuracy                           0.97       376
   macro avg       0.97      0.97      0.97       376
weighted avg       0.97      0.97      0.97       376



APP CODE (app.py)

In [14]:
app_code = """import streamlit as st
import pandas as pd
import joblib

# Load the ML model and encoders
model = joblib.load("donor_match_model.pkl")
le_bg = joblib.load("encoder_blood_group.pkl")
le_avail = joblib.load("encoder_availability.pkl")
le_screened = joblib.load("encoder_screened.pkl")

# Load donor dataset
df = pd.read_csv("donor_dataset_with_match.csv")

# Streamlit UI
st.set_page_config(page_title="HemoTrack AI - Donor Matcher", layout="centered")
st.title("🩸 HemoTrack AI - Donor Match Prediction System")

# Select recipient blood group
blood_groups = le_bg.classes_
recipient_blood_group = st.selectbox("🧬 Select Recipient's Blood Group", blood_groups)

# Select matching method
mode = st.radio("🔎 Choose Match Type", [
    "Recommended Match (Based on Rules)",
    "AI-Powered Match (Smarter Prediction)"
])

# Blood group compatibility map
blood_compatibility = {
    'A+': ['A+', 'A-', 'O+', 'O-'],
    'B+': ['B+', 'B-', 'O+', 'O-'],
    'AB+': list(blood_groups),
    'O+': ['O+', 'O-'],
    'A-': ['A-', 'O-'],
    'B-': ['B-', 'O-'],
    'AB-': ['AB-', 'A-', 'B-', 'O-'],
    'O-': ['O-']
}

# Rule-based match logic
def is_match(donor):
    return (
        donor['Blood_Group'] in blood_compatibility[recipient_blood_group] and
        donor['Availability'] == 'Yes' and
        donor['Screened'] == 'Yes' and
        donor['Last_Donation_Days_Ago'] > 90
    )

# Match button
if st.button("🔍 Find Matching Donors"):
    if "Rule-Based" in mode:
        matches = df[df.apply(is_match, axis=1)]
    else:
        temp_df = df.copy()

        # Encode features
        temp_df['Blood_Group'] = le_bg.transform(temp_df['Blood_Group'])
        temp_df['Availability'] = le_avail.transform(temp_df['Availability'])
        temp_df['Screened'] = le_screened.transform(temp_df['Screened'])

        # Predict match
        X = temp_df[['Blood_Group', 'Last_Donation_Days_Ago', 'Availability', 'Screened']]
        df['ML_Predicted_Match'] = model.predict(X)
        matches = df[df['ML_Predicted_Match'] == 1]

    # Display results
    st.success(f"✅ Found {len(matches)} matching donor(s)")
    st.dataframe(matches[['Donor_ID', 'Name', 'Blood_Group', 'Location', 'Last_Donation_Days_Ago']])

    # Download results
    st.download_button(
        "📥 Download Matching Donors",
        matches.to_csv(index=False).encode('utf-8'),
        file_name="matched_donors.csv",
        mime="text/csv"
    )
"""

with open("app.py", "w") as f:
    f.write(app_code)


In [15]:
from pyngrok import ngrok

!ngrok authtoken 30uwePinxWfVCG4RUnUMcdcpwo9_6Ykw9j2kFqSeQZhFyQV4x

Authtoken saved to configuration file: /root/.config/ngrok/ngrok.yml


launch_streamlit_with_ngrok.py
---



In [16]:
import os
import time
import threading
from pyngrok import ngrok

# Install if not already
os.system("pip install streamlit pyngrok --quiet")

# Set your ngrok authtoken here (get yours from https://dashboard.ngrok.com/get-started/your-authtoken)
NGROK_AUTH_TOKEN = "30uwePinxWfVCG4RUnUMcdcpwo9_6Ykw9j2kFqSeQZhFyQV4x"
ngrok.set_auth_token(NGROK_AUTH_TOKEN)

# Start Streamlit app in a thread
def run_streamlit():
    os.system("streamlit run app.py")

thread = threading.Thread(target=run_streamlit)
thread.start()

# Wait a bit for the server to start
time.sleep(5)

# Connect ngrok HTTP tunnel to port 8501
public_url = ngrok.connect("http://localhost:8501", "http")
print(f"✅ Streamlit app is live at: {public_url}")

✅ Streamlit app is live at: NgrokTunnel: "https://0cd518bc5ec1.ngrok-free.app" -> "http://localhost:8501"
