In [None]:
import pandas as pd
import torch
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from torch.utils.data import Dataset
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
import numpy as np

# Step 1: Load and preprocess the data
# Replace with the path to your dataset files
past_medical_history_df = pd.read_csv('/content/Expanded_Past_Medical_History_Data.csv')  # Replace with correct path
critical_patients_df = pd.read_csv('/content/Expanded_Critical_Patients_Data.csv')  # Replace with correct path

# Fill NaN values and ensure all text columns are strings
past_medical_history_df.fillna("", inplace=True)
critical_patients_df.fillna("", inplace=True)

# Step 2: Encode the condition labels on the entire dataset
label_encoder = LabelEncoder()
critical_patients_df['Condition_Encoded'] = label_encoder.fit_transform(critical_patients_df['Condition'])

# Combine datasets on Patient_ID to merge medical history with current symptoms
combined_df = pd.merge(critical_patients_df, past_medical_history_df, on="Patient_ID", how="inner")

# Combine necessary columns for input (Symptoms, Medical History, and other relevant information)
combined_df['Combined_Input'] = (combined_df['Symptoms'] + " " + combined_df['Medical_History'] + " " +
                                 combined_df['Previous_Medications'] + " " + combined_df['Allergies'] + " " +
                                 combined_df['Family_History'])

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(combined_df['Combined_Input'], combined_df['Condition_Encoded'],
                                                    test_size=0.2, random_state=42)

# Step 3: Build a custom dataset for MedBERT

class MedBERTDataset(Dataset):
    def __init__(self, texts, labels, tokenizer):
        self.texts = texts
        self.labels = labels
        self.tokenizer = tokenizer

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        inputs = self.tokenizer(self.texts.iloc[idx], return_tensors="pt", truncation=True, padding="max_length", max_length=256)
        label = torch.tensor(self.labels.iloc[idx])
        return {'input_ids': inputs['input_ids'].squeeze(0), 'attention_mask': inputs['attention_mask'].squeeze(0), 'labels': label}

# Initialize the BERT tokenizer and model
bert_tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
bert_model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=len(label_encoder.classes_))

# Prepare the datasets for MedBERT
train_dataset = MedBERTDataset(X_train, y_train, bert_tokenizer)
test_dataset = MedBERTDataset(X_test, y_test, bert_tokenizer)

# Step 4: Training the MedBERT model

# Define training arguments
training_args = TrainingArguments(
    output_dir='./results',             # Output directory
    num_train_epochs=3,                 # Number of training epochs
    per_device_train_batch_size=4,      # Batch size for training
    per_device_eval_batch_size=4,       # Batch size for evaluation
    evaluation_strategy="epoch",        # Evaluation strategy
    logging_dir='./logs',               # Directory for logging
    logging_steps=10,
)

# Trainer for MedBERT
trainer = Trainer(
    model=bert_model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=test_dataset,
)

# Train the model
trainer.train()

# Step 5: Evaluate the MedBERT model

# Predict on the test data using MedBERT
bert_model.eval()
predictions = []
true_labels = []

for idx in range(len(X_test)):
    inputs = bert_tokenizer(X_test.iloc[idx], return_tensors="pt", truncation=True, padding="max_length", max_length=256).to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))
    with torch.no_grad():
        outputs = bert_model(**inputs)
        predicted_label = torch.argmax(outputs.logits, dim=-1).item()
        predictions.append(predicted_label)
        true_labels.append(y_test.iloc[idx])

# Convert predictions and true labels to numpy arrays
predictions = np.array(predictions, dtype=int)
true_labels = np.array(true_labels, dtype=int)  # Ensure both are integers

# Decode true_labels and predictions back to their original string labels
true_labels_str = label_encoder.inverse_transform(true_labels)
predictions_str = label_encoder.inverse_transform(predictions)

# Generate a classification report using decoded labels
print("\nClassification Report:")
print(classification_report(true_labels_str, predictions_str, target_names=label_encoder.classes_))

# Calculate additional metrics
accuracy = accuracy_score(true_labels_str, predictions_str)
precision = precision_score(true_labels_str, predictions_str, average='weighted')
recall = recall_score(true_labels_str, predictions_str, average='weighted')
f1 = f1_score(true_labels_str, predictions_str, average='weighted')

print(f"\nAccuracy: {accuracy:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"F1 Score: {f1:.4f}")

# Generate a confusion matrix using decoded labels
conf_matrix = confusion_matrix(true_labels_str, predictions_str)

print("\nConfusion Matrix:")
print(conf_matrix)


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]



model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Epoch,Training Loss,Validation Loss
1,1.9312,1.778222
2,0.7906,0.675134
3,0.6331,0.442077



Classification Report:
                      precision    recall  f1-score   support

  Anaphylactic Shock       1.00      0.31      0.47        13
    Brain Hemorrhage       0.82      1.00      0.90         9
        Heart Attack       1.00      1.00      1.00         8
         Heat Stroke       0.82      1.00      0.90         9
         Hypothermia       1.00      1.00      1.00         8
        Septic Shock       1.00      0.92      0.96        12
Severe Asthma Attack       1.00      1.00      1.00        15
              Stroke       1.00      1.00      1.00        11
    Traffic Accident       1.00      0.67      0.80         6
              Trauma       0.53      1.00      0.69         9

            accuracy                           0.88       100
           macro avg       0.92      0.89      0.87       100
        weighted avg       0.92      0.88      0.87       100


Accuracy: 0.8800
Precision: 0.9249
Recall: 0.8800
F1 Score: 0.8683

Confusion Matrix:
[[ 4  1  0  1  0  

In [None]:
from transformers import T5Tokenizer, T5ForConditionalGeneration
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
from transformers import Trainer, TrainingArguments
import torch

t5_tokenizer = T5Tokenizer.from_pretrained('t5-base')
t5_model = T5ForConditionalGeneration.from_pretrained('t5-base')

# Custom dataset for T5 (Medical Report Generation)
class T5Dataset(Dataset):
    def __init__(self, inputs, targets, tokenizer, max_input_len=512, max_target_len=150):
        self.inputs = inputs
        self.targets = targets
        self.tokenizer = tokenizer
        self.max_input_len = max_input_len
        self.max_target_len = max_target_len

    def __len__(self):
        return len(self.inputs)

    def __getitem__(self, idx):
        input_text = self.inputs[idx]
        target_text = self.targets[idx]

        inputs = self.tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.max_input_len)
        targets = self.tokenizer(target_text, return_tensors="pt", truncation=True, padding="max_length", max_length=self.max_target_len)

        return {
            'input_ids': inputs['input_ids'].squeeze(0),
            'attention_mask': inputs['attention_mask'].squeeze(0),
            'labels': targets['input_ids'].squeeze(0)
        }

# Example: Creating input data for T5 (Predicted condition and patient data)
# Suppose MedBERT predicted 'Anaphylactic Shock', and we also have patient symptoms and vitals
medbert_prediction = 'Anaphylactic Shock'
patient_data = "Symptoms include hives, difficulty breathing, and swelling. The patient was administered epinephrine. Vital signs: BP 70/40, HR 120, SpO2 80%."

# Prepare T5 inputs for fine-tuning
t5_inputs = [f"Condition: {medbert_prediction}. {patient_data}"]
t5_targets = ["The patient is suffering from Anaphylactic Shock, indicated by severe allergic symptoms including hives, difficulty breathing, and swelling. Epinephrine was administered as first-line treatment. Vital signs suggest hypotension and tachycardia. Immediate hospitalization is recommended."]

# Create T5 dataset
t5_train_dataset = T5Dataset(t5_inputs, t5_targets, t5_tokenizer)

# Define training arguments for T5
training_args_t5 = TrainingArguments(
    output_dir='./t5_results',          # Output directory
    num_train_epochs=3,                 # Number of training epochs
    per_device_train_batch_size=2,      # Batch size for training
    per_device_eval_batch_size=2,       # Batch size for evaluation
    evaluation_strategy="epoch",        # Evaluate every epoch
    logging_dir='./t5_logs',            # Directory for logging
    logging_steps=10,
)

# Train the T5 model
t5_trainer = Trainer(
    model=t5_model,
    args=training_args_t5,
    train_dataset=t5_train_dataset,
    eval_dataset=t5_train_dataset  # For demonstration; typically you should have a separate eval set
)

# Train the model
t5_trainer.train()

# Step 3: Generate a medical report using T5
def generate_medical_report(t5_model, tokenizer, condition, patient_info):
    input_text = f"Condition: {condition}. {patient_info}"
    inputs = tokenizer(input_text, return_tensors="pt", truncation=True, padding="max_length", max_length=512).input_ids.to(t5_model.device)
    summary_ids = t5_model.generate(inputs, max_length=150, num_beams=4, early_stopping=True)
    return tokenizer.decode(summary_ids[0], skip_special_tokens=True)

# Example: Generate a medical report for a test case
generated_report = generate_medical_report(t5_model, t5_tokenizer, medbert_prediction, patient_data)
print(f"Generated Medical Report: {generated_report}")

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.39M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/1.21k [00:00<?, ?B/s]

You are using the default legacy behaviour of the <class 'transformers.models.t5.tokenization_t5.T5Tokenizer'>. This is expected, and simply means that the `legacy` (previous) behavior will be used so nothing changes for you. If you want to use the new behaviour, set `legacy=False`. This should only be set if you understand what it means, and thoroughly read the reason why this was added as explained in https://github.com/huggingface/transformers/pull/24565


model.safetensors:   0%|          | 0.00/892M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]



Epoch,Training Loss,Validation Loss
1,No log,9.946796
2,No log,8.541268
3,No log,7.94821


Generated Medical Report: Anaphylactic Shock. Symptoms include hives, difficulty breathing, and swelling.


In [None]:
import pandas as pd
import numpy as np
from math import radians, sin, cos, sqrt, atan2

# Sample hospital dataset with doctor counts
hospital_data_with_count = {
    'Hospital_ID': [1, 2, 3, 4, 5],
    'Hospital_Name': [
        'City General Hospital',
        'Metro Health Center',
        'Green Valley Medical',
        'Riverbend Hospital',
        'Summit Care Hospital'
    ],
    'Longitude': [77.5946, 77.1025, 72.8777, 88.3639, 78.9629],
    'Latitude': [12.9716, 28.7041, 19.0760, 22.5726, 20.5937],
    'Cardiology_Doctors': [3, 0, 2, 2, 3],
    'Neurology_Doctors': [2, 1, 0, 3, 2],
    'Trauma_Doctors': [1, 0, 2, 0, 0],
    'Emergency_Doctors': [0, 3, 0, 1, 2],
    'Pediatrics_Doctors': [0, 2, 1, 0, 2]
}

# Create the DataFrame
hospital_df_with_count = pd.DataFrame(hospital_data_with_count)

# Now, use the hospital_df_with_count in the previous code
hospital_df = hospital_df_with_count.copy()

# Then you can proceed with the rest of your code

# Sample dataset of hospitals (assuming hospital_df_with_count already exists from the previous code)
hospital_df = hospital_df_with_count.copy()

# Function to calculate distance using Haversine formula
def haversine(lon1, lat1, lon2, lat2):
    # Convert latitude and longitude from degrees to radians
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])

    # Haversine formula
    dlon = lon2 - lon1
    dlat = lat2 - lat1
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    r = 6371  # Radius of earth in kilometers
    return r * c  # Distance in kilometers

# Example patient location (latitude, longitude)
patient_lat = 12.9716
patient_lon = 77.5946

# Condition: Heart Attack (handled by Cardiology doctors)
condition = 'Heart Attack'

# Map conditions to specialties
condition_specialty_map = {
    'Heart Attack': 'Cardiology_Doctors',
    'Septic Shock': ['Cardiology_Doctors', 'Emergency_Doctors'],
    'Stroke': 'Neurology_Doctors',
    'Brain Hemorrhage': 'Neurology_Doctors',
    'Severe Asthma Attack': ['Neurology_Doctors', 'Pediatrics_Doctors'],
    'Traffic Accident': 'Trauma_Doctors',
    'Trauma': 'Trauma_Doctors',
    'Anaphylactic Shock': ['Emergency_Doctors', 'Pediatrics_Doctors'],
    'Heat Stroke': 'Emergency_Doctors',
    'Hypothermia': 'Emergency_Doctors'
}

# Identify the relevant columns for the condition
specialties = condition_specialty_map[condition]

# Ensure specialties is a list
if not isinstance(specialties, list):
    specialties = [specialties]

# Function to score hospitals based on number of specialized doctors and distance
def score_hospitals(hospital_df, patient_lat, patient_lon, specialties):
    hospital_df['Distance_km'] = hospital_df.apply(lambda row: haversine(patient_lon, patient_lat, row['Longitude'], row['Latitude']), axis=1)
    hospital_df['Doctor_Count'] = hospital_df[specialties].sum(axis=1)

    # Remove hospitals with 0 doctors for the condition
    hospital_df = hospital_df[hospital_df['Doctor_Count'] > 0]

    # Calculate score (more doctors and less distance means higher score)
    max_doctors = hospital_df['Doctor_Count'].max()
    max_distance = hospital_df['Distance_km'].max()

    hospital_df['Score'] = (hospital_df['Doctor_Count'] / max_doctors) * 0.7 + (1 - hospital_df['Distance_km'] / max_distance) * 0.3

    # Sort hospitals by score (descending)
    hospital_df_sorted = hospital_df.sort_values(by='Score', ascending=False)

    return hospital_df_sorted

# Apply the scoring function
ranked_hospitals = score_hospitals(hospital_df, patient_lat, patient_lon, specialties)

# Apply the scoring function to rank hospitals for the selected patient
ranked_hospitals = score_hospitals(hospital_df_with_count, patient_lat, patient_lon, specialties)

# Use print to display the ranked hospitals DataFrame
print(ranked_hospitals)

   Hospital_ID          Hospital_Name  Longitude  Latitude  \
0            1  City General Hospital    77.5946   12.9716   
4            5   Summit Care Hospital    78.9629   20.5937   
2            3   Green Valley Medical    72.8777   19.0760   
3            4     Riverbend Hospital    88.3639   22.5726   

   Cardiology_Doctors  Neurology_Doctors  Trauma_Doctors  Emergency_Doctors  \
0                   3                  2               1                  0   
4                   3                  2               0                  2   
2                   2                  0               2                  0   
3                   2                  3               0                  1   

   Pediatrics_Doctors  Distance_km  Doctor_Count     Score  
0                   0     0.000000             3  1.000000  
4                   2   859.942560             3  0.834700  
2                   1   845.318386             2  0.604178  
3                   0  1560.696623             2 

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hospital_df['Score'] = (hospital_df['Doctor_Count'] / max_doctors) * 0.7 + (1 - hospital_df['Distance_km'] / max_distance) * 0.3
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  hospital_df['Score'] = (hospital_df['Doctor_Count'] / max_doctors) * 0.7 + (1 - hospital_df['Distance_km'] / max_distance) * 0.3


In [None]:
!pip install transformers
from transformers import BartTokenizer, BartForConditionalGeneration

bart_tokenizer = BartTokenizer.from_pretrained('facebook/bart-large')
bart_model = BartForConditionalGeneration.from_pretrained('facebook/bart-large').to(torch.device("cuda" if torch.cuda.is_available() else "cpu"))





In [110]:
!pip install fastapi uvicorn pyngrok



In [111]:
@app.get("/")
async def root():
    return {"message": "Hello World from FastAPI!"}


In [112]:
from fastapi import FastAPI
import uvicorn
from threading import Thread

# Create FastAPI app
app = FastAPI()

# Define a simple route to check if the app is working
@app.get("/")
async def root():
    return {"message": "Hello from FastAPI!"}

# Function to run the FastAPI app on port 8000
def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)

# Start FastAPI in a separate thread
thread = Thread(target=run_app)
thread.start()


INFO:     Started server process [10500]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


In [113]:
!mkdir frontend

mkdir: cannot create directory ‘frontend’: File exists


In [114]:
from google.colab import files

# Upload the HTML, CSS, and JS files
uploaded = files.upload()

# Save files in the frontend directory
for filename in uploaded.keys():
    with open(f"frontend/{filename}", "wb") as f:
        f.write(uploaded[filename])

Saving demo (1).js to demo (1) (10).js
Saving Demo (1).css to Demo (1) (9).css
Saving FrontEndDemo (1).html to FrontEndDemo (1) (9).html


In [115]:
!ls frontend

'demo (1) (10).js'  'Demo (1) (4).css'	'demo (1) (7).js'	     'FrontEndDemo (1) (3).html'
'Demo (1) (1).css'  'demo (1) (4).js'	'Demo (1) (8).css'	     'FrontEndDemo (1) (4).html'
'demo (1) (1).js'   'Demo (1) (5).css'	'demo (1) (8).js'	     'FrontEndDemo (1) (5).html'
'Demo (1) (2).css'  'demo (1) (5).js'	'Demo (1) (9).css'	     'FrontEndDemo (1) (6).html'
'demo (1) (2).js'   'Demo (1) (6).css'	'demo (1) (9).js'	     'FrontEndDemo (1) (7).html'
'Demo (1) (3).css'  'demo (1) (6).js'	'FrontEndDemo (1) (1).html'  'FrontEndDemo (1) (8).html'
'demo (1) (3).js'   'Demo (1) (7).css'	'FrontEndDemo (1) (2).html'  'FrontEndDemo (1) (9).html'


In [116]:
from fastapi import FastAPI
from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel
import uvicorn
from pyngrok import ngrok
from threading import Thread

# Initialize the FastAPI app
app = FastAPI()

# Serve static files (HTML, CSS, JS)
app.mount("/static", StaticFiles(directory="frontend"), name="static")


# Define request models for the API endpoints
class ConditionPredictionRequest(BaseModel):
    patient_data: str


class ReportGenerationRequest(BaseModel):
    condition: str
    patient_info: str


class HospitalRankingRequest(BaseModel):
    latitude: float
    longitude: float
    condition: str


# API endpoint for predicting the condition based on patient symptoms
@app.post("/predict-condition/")
async def predict_condition(request: ConditionPredictionRequest):
    patient_data = request.patient_data

    # Replace with actual AI model logic for condition prediction
    condition = "Cardiac Arrest" if "chest pain" in patient_data.lower() else "Unknown Condition"

    return {"predicted_condition": condition}


# API endpoint for generating a medical report based on the condition and patient information
@app.post("/generate-report/")
async def generate_report(request: ReportGenerationRequest):
    condition = request.condition
    patient_info = request.patient_info

    # Generate a report (this can be customized further)
    report = f"Medical Report for condition '{condition}':\nPatient Info: {patient_info}"

    return {"generated_report": report}


# API endpoint for ranking nearby hospitals based on location and condition
@app.post("/rank-hospitals/")
async def rank_hospitals(request: HospitalRankingRequest):
    latitude = request.latitude
    longitude = request.longitude
    condition = request.condition

    # Example hospital ranking (replace with actual ranking logic)
    hospitals = [
        {"Hospital_Name": "City Hospital", "Distance_km": 2.3},
        {"Hospital_Name": "Town Clinic", "Distance_km": 1.8}
    ]

    return {"ranked_hospitals": hospitals}


# Function to run the FastAPI app using uvicorn
def run_app():
    uvicorn.run(app, host="0.0.0.0", port=8000)


# Start ngrok to expose the FastAPI app
public_url = ngrok.connect(8000)
print(f"Public URL: {public_url}")

# Start FastAPI in a background thread
thread = Thread(target=run_app)
thread.start()


Public URL: NgrokTunnel: "https://3656-34-75-74-240.ngrok-free.app" -> "http://localhost:8000"


INFO:     Started server process [10500]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
ERROR:    [Errno 98] error while attempting to bind on address ('0.0.0.0', 8000): address already in use
INFO:     Waiting for application shutdown.
INFO:     Application shutdown complete.


In [119]:
!curl -X GET https://3656-34-75-74-240.ngrok-free.app

INFO:     34.75.74.240:0 - "GET / HTTP/1.1" 200 OK
{"message":"Hello from FastAPI!"}

In [120]:
"http://3656-34-75-74-240.ngrok-free.app.ngrok.io/static/index.html"

'http://3656-34-75-74-240.ngrok-free.app.ngrok.io/static/index.html'