In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse import hstack
import pandas as pd

In [None]:
# Load the dataset
file_path = 'sepsis_diab_pt_all.xlsx'  # Update if needed
# Load all sheets
sheets = pd.ExcelFile(file_path)
sheet_names = sheets.sheet_names
print("Available Sheets:", sheet_names)

In [None]:
# Load individual sheets
admission_data = sheets.parse('sepsis_pt_all_admission details')
lab_events = sheets.parse('sepsis_lab_events')
microbiology_events = sheets.parse('microbiology events')
prescription_data = sheets.parse('prescriptoin')
#poe_data = sheets.parse('poe')
#poe_details = sheets.parse('poe_detail')

In [None]:
admission_data.info(verbose=True)

In [None]:
# Select relevant columns
admission_data = admission_data[['hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours']].drop_duplicates()
prescription_data = prescription_data[['hadm_id', 'drug']].drop_duplicates()

prescription_data=prescription_data.reset_index()

# Drop rows with missing values in critical columns
admission_data.dropna(subset=['hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours'], inplace=True)
prescription_data.dropna(subset=['hadm_id', 'drug'], inplace=True)

# Merge admissions and prescriptions
admission_drug_data = pd.merge(prescription_data, admission_data, on='hadm_id', how='inner')

# Encode categorical variables
categorical_features = pd.get_dummies(
    admission_drug_data[['admission_type', 'drg_code', 'dx_1_code']],
    drop_first=True
)

# Scale numerical features (EDHOURS)
scaler = StandardScaler()
numerical_features = scaler.fit_transform(admission_drug_data[['edhours']])

numerical_features = pd.DataFrame(numerical_features, columns=['scaled_edhours'])

# TF-IDF for drug names
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
drug_tfidf_matrix = tfidf_vectorizer.fit_transform(admission_drug_data['drug'])

# Combine All Features into a Single DataFrame
combined_features = pd.concat([categorical_features.reset_index(drop=True), numerical_features.reset_index(drop=True)], axis=1).reset_index(drop=True)

print("Combined Features Shape:", combined_features.shape)

In [None]:
import numpy as np
def recommend_content_based_dynamic(query_dict, combined_features, admission_drug_data, scaler,top_n=5):
    """
    Recommend drugs based on content features using cosine similarity, handling dynamic queries.
    
    Args:
        query_dict (dict): Query details with dynamic feature set (e.g., 'admission_type', 'DRG', 'EDHOURS').
        combined_features (pd.DataFrame): Feature matrix for admissions.
        admission_drug_data (pd.DataFrame): Original admission and drug data.
        top_n (int): Number of recommendations.
    
    Returns:
        pd.Series: Top recommended drugs.
    """    
    # Step 1: Strict Filtering for DRG and DX_1_CODE
    #drg = query_dict.get("drg_code")
    #dx_code = query_dict.get("dx_1_code")
    #print(drg)
    #print(dx_code)
    #filtered_data = admission_drug_data[
    #    (admission_drug_data["drg_code"] == drg)
    #    | (admission_drug_data["dx_1_code"] == dx_code)
    #]

    #if filtered_data.empty:
    #    return f"No recommendations available for DRG: {drg} and DX_1_CODE: {dx_code}"

    # Step 1: Dynamically Filter Data Based on Query
    
    filters = []
    for key, value in query_dict.items():
        if key in admission_drug_data.columns and value is not None:
            filters.append(admission_drug_data[key] == value)
    
    if filters:
        filtered_data = admission_drug_data[np.logical_or.reduce(filters)]
    else:
        return "No valid query features provided."
    
    if filtered_data.empty:
        return f"No recommendations available for DRG: {drg} and DX_1_CODE: {dx_code}"        
    
    # Align combined_features with filtered_data
    filtered_indices = filtered_data.index
    filtered_features = combined_features.loc[filtered_indices]

    query_categorical = {}
    query_numerical = []
    
    for key, value in query_dict.items():
        if key in filtered_features.columns:
            print(key)
            print(filtered_features[key].dtype)
            # Handle Categorical Features
            if filtered_features[key].dtype == 'int64':
                query_categorical[key] = value
            # Handle Numerical Features
            elif key == 'scaled_edhours':
                query_numerical.append(value)
    
    # Encode Query Categorical Features
    query_categorical_df = pd.DataFrame([query_categorical])
    query_categorical_encoded = pd.get_dummies(query_categorical_df).reindex(columns=filtered_features.columns, fill_value=0)
    
    # Scale Numerical Features
    if query_numerical:
        query_numerical_scaled = scaler.transform([query_numerical])
    else:
        query_numerical_scaled = pd.DataFrame([[0]], columns=['scaled_edhours'])

    query_features = pd.concat([query_categorical_encoded, pd.DataFrame(query_numerical_scaled)], axis=1).fillna(0).iloc[0].values
   
    query_features = query_features[:filtered_features.shape[1]]
    similarity_scores = cosine_similarity([query_features], filtered_features)
    print(similarity_scores)
    # Get Top N Similar Admissions
    top_indices = similarity_scores.argsort()[0][-top_n:][::-1]
    
    similar_admissions = filtered_data.iloc[top_indices]


    # Recommend Drugs from Similar Admissions
    recommended_drugs = similar_admissions['drug'].value_counts().head(top_n)

    return recommended_drugs

In [None]:
query_dict = {
    "drg_code": 870,
    #"dx_1_code": "J18.9",
    "admission_type": "EW EMER.",
    "scaled_edhours": 5
}

recommendations = recommend_content_based_dynamic(
    query_dict=query_dict,
    combined_features=combined_features,
    admission_drug_data=admission_drug_data,
    scaler=scaler,
    top_n=10
)

print(recommendations)

In [None]:
#Lab tests Recommendations

In [None]:
microbiology_events.head(2)

In [None]:
# Select relevant columns
admission_data_forevents = admission_data[['hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours']].drop_duplicates()
microbiology_events = microbiology_events[['hadm_id', 'spec_type_desc']].drop_duplicates()

microbiology_events=microbiology_events.reset_index()

# Drop rows with missing values in critical columns
admission_data_forevents.dropna(subset=['hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours'], inplace=True)
microbiology_events.dropna(subset=['hadm_id', 'spec_type_desc'], inplace=True)

# Merge admissions and prescriptions
admission_events_data = pd.merge(microbiology_events, admission_data_forevents, on='hadm_id', how='inner')

# Encode categorical variables
categorical_features_events = pd.get_dummies(
    admission_data_forevents[['admission_type', 'drg_code', 'dx_1_code']],drop_first=True
)

# Scale numerical features (EDHOURS)
scaler_events = StandardScaler()
numerical_features_events = scaler_events.fit_transform(admission_data_forevents[['edhours']])

numerical_features_events = pd.DataFrame(numerical_features_events, columns=['scaled_edhours'])

# TF-IDF for drug names
#tfidf_vectorizer_events = TfidfVectorizer(stop_words='english')
#events_tfidf_matrix = tfidf_vectorizer_events.fit_transform(admission_events_data['spec_type_desc'])

# Combine All Features into a Single DataFrame
combined_features_events = pd.concat([categorical_features_events.reset_index(drop=True), numerical_features_events.reset_index(drop=True)], axis=1).reset_index(drop=True)

print("Combined Features Shape:", combined_features_events.shape)

In [None]:
def recommend_content_based_dynamic_events(query_dict, combined_features_events, admission_events_data, scaler_events,top_n=5):
    """
    Recommend drugs based on content features using cosine similarity, handling dynamic queries.
    
    Args:
        query_dict (dict): Query details with dynamic feature set (e.g., 'admission_type', 'DRG', 'EDHOURS').
        combined_features (pd.DataFrame): Feature matrix for admissions.
        admission_drug_data (pd.DataFrame): Original admission and drug data.
        top_n (int): Number of recommendations.
    
    Returns:
        pd.Series: Top recommended drugs.
    """    
    # Step 1: Strict Filtering for DRG and DX_1_CODE
    
    filters = []
    for key, value in query_dict.items():
        if key in admission_events_data.columns and value is not None:
            filters.append(admission_events_data[key] == value)
    
    if filters:
        filtered_data = admission_events_data[np.logical_or.reduce(filters)]
    else:
        return "No valid query features provided."
    
    if filtered_data.empty:
        return f"No recommendations available for DRG: {drg} and DX_1_CODE: {dx_code}"
        
      
    # Align combined_features with filtered_data
    filtered_indices = filtered_data.index
    filtered_features = combined_features_events.loc[filtered_indices]

    query_categorical = {}
    query_numerical = []
    
    for key, value in query_dict.items():
        if key in filtered_features.columns:
            print(key)
            print(filtered_features[key].dtype)
            # Handle Categorical Features
            if filtered_features[key].dtype == 'int64':
                query_categorical[key] = value
            # Handle Numerical Features
            elif key == 'scaled_edhours':
                query_numerical.append(value)
    
    # Encode Query Categorical Features
    query_categorical_df = pd.DataFrame([query_categorical])
    query_categorical_encoded = pd.get_dummies(query_categorical_df).reindex(columns=filtered_features.columns, fill_value=0)
    
    # Scale Numerical Features
    if query_numerical:
        query_numerical_scaled = scaler.transform([query_numerical])
    else:
        query_numerical_scaled = pd.DataFrame([[0]], columns=['scaled_edhours'])

    query_features = pd.concat([query_categorical_encoded, pd.DataFrame(query_numerical_scaled)], axis=1).fillna(0).iloc[0].values
   
    query_features = query_features[:filtered_features.shape[1]]
    similarity_scores = cosine_similarity([query_features], filtered_features)
    print(similarity_scores)
    # Get Top N Similar Admissions
    top_indices = similarity_scores.argsort()[0][-top_n:][::-1]
    
    similar_admissions = filtered_data.iloc[top_indices]


    # Recommend Drugs from Similar Admissions
    recommended_events = similar_admissions['spec_type_desc'].value_counts().head(top_n)

    return recommended_events

In [None]:
query_dict = {
    "drg_code": 870,
    "dx_1_code": "J18.9",
    "admission_type": "EW EMER.",
    "scaled_edhours": 5
}

recommendations = recommend_content_based_dynamic_events(
    query_dict=query_dict,
    combined_features_events=combined_features_events,
    admission_events_data=admission_events_data,
    scaler_events=scaler_events,
    top_n=10
)

print(recommendations)