In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from scipy.sparse import hstack
import pandas as pd

In [None]:
# Load the dataset
file_path = 'sepsis_diab_pt_all_v2.xlsx'  # Update if needed
# Load all sheets
sheets = pd.ExcelFile(file_path)
sheet_names = sheets.sheet_names
print("Available Sheets:", sheet_names)

In [None]:
# Load individual sheets
admission_data = sheets.parse('sepsis_pt_all_admission details')
lab_events = sheets.parse('sepsis_lab_events')
microbiology_events = sheets.parse('microbiology events')
prescription_data = sheets.parse('prescriptoin')
#poe_data = sheets.parse('poe')
#poe_details = sheets.parse('poe_detail')

In [None]:
microbiology_events

In [None]:
admission_data.info(verbose=True)

In [None]:
# Select relevant columns
admission_data = admission_data[['subject_id','hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours','heartdisease_flag','kidneydisease_flag']].drop_duplicates()
prescription_data = prescription_data[['subject_id','hadm_id', 'drug']].drop_duplicates()

prescription_data=prescription_data.reset_index()

# Drop rows with missing values in critical columns
admission_data.dropna(subset=['subject_id','hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours','heartdisease_flag','kidneydisease_flag'], inplace=True)
prescription_data.dropna(subset=['subject_id','hadm_id', 'drug'], inplace=True)

# Merge admissions and prescriptions
admission_drug_data = pd.merge(prescription_data, admission_data, on='hadm_id', how='inner')
admission_drug_data.rename(columns={'subject_id_x': 'subject_id'}, inplace=True)

# Encode categorical variables
categorical_features = pd.get_dummies(
    admission_drug_data[['admission_type', 'drg_code', 'dx_1_code','heartdisease_flag','kidneydisease_flag']],
    drop_first=True
)

# Scale numerical features (EDHOURS)
scaler = StandardScaler()
numerical_features = scaler.fit_transform(admission_drug_data[['edhours']])

numerical_features = pd.DataFrame(numerical_features, columns=['scaled_edhours'])

# TF-IDF for drug names
tfidf_vectorizer = TfidfVectorizer(stop_words='english')
drug_tfidf_matrix = tfidf_vectorizer.fit_transform(admission_drug_data['drug'])

# Combine All Features into a Single DataFrame
combined_features = pd.concat([categorical_features.reset_index(drop=True), numerical_features.reset_index(drop=True)], axis=1).reset_index(drop=True)

print("Combined Features Shape:", combined_features.shape)

In [None]:
combined_features.head()

In [None]:
# Step 1: Create Interaction Matrix
def create_user_interaction_matrix(admission_data, col):
    """
    Creates an interaction matrix for admissions and spec_type_desc.

    Args:
        admission_events_data (pd.DataFrame): Dataset containing 'hadm_id' and 'spec_type_desc'.

    Returns:
        pd.DataFrame: Interaction matrix with 'hadm_id' as rows and 'spec_type_desc' as columns.
    """
    interaction_matrix = pd.pivot_table(
        admission_data,
        values='drg_code',  # Use any value column; replace with actual interaction measure if available.
        index='subject_id',
        columns=col,
        aggfunc='count',  # Count occurrences (binary presence).
        fill_value=0
    )
    return interaction_matrix

# Step 2: Calculate User-User Similarity
def calculate_user_similarity(interaction_matrix):
    """
    Computes cosine similarity between users (rows).

    Args:
        interaction_matrix (pd.DataFrame): User-event interaction matrix.

    Returns:
        pd.DataFrame: User-user similarity matrix.
    """
    similarity_matrix = cosine_similarity(interaction_matrix)
    similarity_df = pd.DataFrame(
        similarity_matrix,
        index=interaction_matrix.index,  # Users as index
        columns=interaction_matrix.index  # Users as columns
    )
    return similarity_df


# Step 3: Recommend Events Based on User Similarity
def recommend_events_user_based(patient_id, interaction_matrix, user_similarity, top_n=5):
    """
    Recommends events (drugs/tests) based on similar users.

    Args:
        patient_id (int): Patient ID for whom to recommend events.
        interaction_matrix (pd.DataFrame): Patient-event interaction matrix.
        user_similarity (pd.DataFrame): User-user similarity matrix.
        top_n (int): Number of recommendations to return.

    Returns:
        pd.Series: Top recommended events with similarity scores.
    """
    if patient_id not in interaction_matrix.index:
        return "Patient ID not found in dataset."

    # Get the most similar users
    similar_users = user_similarity[patient_id].drop(patient_id).nlargest(top_n)

    if similar_users.empty:
        print("No similar users found!")
        return None

    # Aggregate event occurrences from similar users (weighted by similarity scores)
    weighted_event_scores = interaction_matrix.loc[similar_users.index].T.dot(similar_users)
    
    # Remove events the patient has already encountered
    known_events = interaction_matrix.loc[patient_id]
    recommended_events = weighted_event_scores[known_events == 0].sort_values(ascending=False).head(top_n)

    return {
        "recommended_events": recommended_events.to_dict(),  # Convert to dictionary for clarity
        "similar_users": dict(zip(similar_users.index, similar_users.values))  # Map user IDs to similarity scores
    }

In [None]:
admission_drug_data.head(2)

In [None]:
if __name__ == "__main__":
    # Load dataset (Replace with actual dataset)
    #admission_drug_data = pd.read_csv("admission_drug_data.csv")

    # Step 1: Create user-event interaction matrix
    interaction_matrix = create_user_interaction_matrix(admission_drug_data, "drug")
    print("Interaction Matrix Shape:", interaction_matrix.shape)
    
    # Step 2: Compute user-user similarity
    user_similarity_df = calculate_user_similarity(interaction_matrix)
    print("User-User Similarity Matrix Shape:", user_similarity_df.shape)
    
    # Step 3: Recommend drugs for a specific patient
    patient_id = 10577647  # Replace with actual patient ID
    recommendations = recommend_events_user_based(patient_id, interaction_matrix, user_similarity_df, top_n=5)
    
    print(f"Recommendations for patient {patient_id}:")
    print("Recommended Events with Scores:", recommendations["recommended_events"])
    print("Similar Users with Scores:", recommendations["similar_users"])

In [None]:
microbiology_events.shape

In [None]:
# Load individual sheets
admission_data = sheets.parse('sepsis_pt_all_admission details')
lab_events = sheets.parse('sepsis_lab_events')
microbiology_events = sheets.parse('microbiology events')
prescription_data = sheets.parse('prescriptoin')
#poe_data = sheets.parse('poe')
#poe_details = sheets.parse('poe_detail')

In [None]:
# Select relevant columns
admission_data_forevents = admission_data[['subject_id','hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours']].drop_duplicates()
microbiology_events = microbiology_events[['subject_id','hadm_id', 'spec_type_desc']].drop_duplicates()

microbiology_events=microbiology_events.reset_index()

# Drop rows with missing values in critical columns
admission_data_forevents.dropna(subset=['subject_id','hadm_id', 'admission_type', 'drg_code', 'dx_1_code', 'edhours'], inplace=True)
microbiology_events.dropna(subset=['subject_id','hadm_id', 'spec_type_desc'], inplace=True)

# Merge admissions and prescriptions
#admission_drug_data = pd.merge(prescription_data, admission_data, on='hadm_id', how='inner')
#admission_drug_data.rename(columns={'subject_id_x': 'subject_id'}, inplace=True)

# Merge admissions and prescriptions
admission_events_data = pd.merge(microbiology_events, admission_data_forevents, on='subject_id', how='inner')
admission_events_data.rename(columns={'subject_id_x': 'subject_id'}, inplace=True)

# Encode categorical variables
categorical_features_events = pd.get_dummies(
    admission_data_forevents[['admission_type', 'drg_code', 'dx_1_code']],drop_first=True
)

# Scale numerical features (EDHOURS)
scaler_events = StandardScaler()
numerical_features_events = scaler_events.fit_transform(admission_data_forevents[['edhours']])

numerical_features_events = pd.DataFrame(numerical_features_events, columns=['scaled_edhours'])

# TF-IDF for drug names
#tfidf_vectorizer_events = TfidfVectorizer(stop_words='english')
#events_tfidf_matrix = tfidf_vectorizer_events.fit_transform(admission_events_data['spec_type_desc'])

# Combine All Features into a Single DataFrame
combined_features_events = pd.concat([categorical_features_events.reset_index(drop=True), numerical_features_events.reset_index(drop=True)], axis=1).reset_index(drop=True)

print("Combined Features Shape:", combined_features_events.shape)

In [None]:
combined_features_events.head(2)

In [None]:
#prescription_data['drug'].value_counts()

In [None]:
#admission_events_data.rename(columns={'subject_id_x':'subject_id'}, inplace=True)

In [None]:
admission_events_data[admission_events_data['subject_id']==10577647]

In [None]:
if __name__ == "__main__":
    # Load dataset (Replace with actual dataset)
    #admission_drug_data = pd.read_csv("admission_drug_data.csv")

    # Step 1: Create user-event interaction matrix
    interaction_matrix = create_user_interaction_matrix(admission_events_data, "spec_type_desc")
    print("Interaction Matrix Shape:", interaction_matrix.shape)
    
    # Step 2: Compute user-user similarity
    user_similarity_df = calculate_user_similarity(interaction_matrix)
    print("User-User Similarity Matrix Shape:", user_similarity_df.shape)
    
    # Step 3: Recommend drugs for a specific patient
    patient_id = 10577647  # Replace with actual patient ID
    recommendations = recommend_events_user_based(patient_id, interaction_matrix, user_similarity_df, top_n=5)
    print(recommendations)
    print(f"Recommendations for patient {patient_id}:")
    #print("Recommended Events with Scores:", recommendations["recommended_events"])
    #print("Similar Users with Scores:", recommendations["similar_users"])

In [None]:
#Recommendation of microbilogy events based on item Based filtering  