In [None]:

sheet_url = "https://docs.google.com/spreadsheets/d/1qfsk9oR_Ml9Upkbgje8qEKyX8w78SulLiJZ9UIbL0PI/export?format=csv"


import streamlit as st
import pandas as pd
import plotly.express as px


# -----------------------------
# Load Data from CSV
# -----------------------------
@st.cache_data
def load_data():
    df = pd.read_csv(sheet_url)
    # Convert timestamps if present
    if "observationStart" in df.columns:
        df["observationStart"] = pd.to_datetime(df["observationStart"])
    if "observationEnd" in df.columns:
        df["observationEnd"] = pd.to_datetime(df["observationEnd"])
    return df

df = load_data()

# Install dependencies if not already installed
# !pip install streamlit plotly pandas ipywidgets

import pandas as pd
import plotly.express as px
import ipywidgets as widgets
from IPython.display import display
import plotly.graph_objects as go

    
# -----------------------------
# Complete Daily-Aggregated Dashboard with Alerts
# -----------------------------
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import ipywidgets as widgets
from IPython.display import display, clear_output

# Load CSV
df = pd.read_csv(sheet_url, parse_dates=["observationStart","observationEnd"])
df = df.copy()  # avoid SettingWithCopyWarning

df['date'] = df['observationStart'].dt.date

# Columns
vitals_cols = ["heartRate","spo2","temperature"]
adls_nutrition_cols = ["stepsTaken","sleepHours","exerciseMinutes","calorieIntake","waterIntakeMl"]

# Convert vitals to numeric (strip units)
for col in vitals_cols:
    df[col] = df[col].astype(str).str.extract(r"(\d+\.?\d*)")[0].astype(float)

# Convert ADLs & Nutrition to numeric
for col in adls_nutrition_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Daily aggregation
vitals_daily = df.groupby(['patientId','date'])[vitals_cols].mean().reset_index()
adls_nutrition_daily = df.groupby(['patientId','date'])[adls_nutrition_cols].sum().reset_index()
alerts_daily = df.groupby(['patientId','date'])['alerts'].apply(lambda x: ",".join([a for a in x if pd.notna(a) and a!=""])).reset_index()
daily_df = pd.merge(vitals_daily, adls_nutrition_daily, on=['patientId','date'])
daily_df = pd.merge(daily_df, alerts_daily, on=['patientId','date'])

# Patient selector
patient_ids = df['patientId'].unique()
patient_selector = widgets.Dropdown(options=patient_ids, description="Patient ID:")
output = widgets.Output()

def update_dashboard(change):
    with output:
        clear_output()
        
        patient_id = patient_selector.value
        patient_data = df[df["patientId"]==patient_id].copy()
        daily_data = daily_df[daily_df["patientId"]==patient_id]

        # Header
        patient = patient_data.iloc[0]
        print(f"Patient ID: {patient['patientId']} | Age: {patient['age']} | Gender: {patient['gender']}\n")

        # -----------------------------
        # Vitals Charts with Alerts
        # -----------------------------
        for col in vitals_cols:
            if col in daily_data.columns:
                fig = go.Figure()
                # Normal daily average line
                fig.add_trace(go.Scatter(x=daily_data['date'], y=daily_data[col],
                                         mode='lines+markers', name=col,
                                         line=dict(color='blue')))
                # Highlight alerts
                alert_points = daily_data[daily_data['alerts'].str.contains(col.split(' ')[0], na=False)]
                if not alert_points.empty:
                    fig.add_trace(go.Scatter(x=alert_points['date'], y=alert_points[col],
                                             mode='markers', name='Alert',
                                             marker=dict(color='red', size=10, symbol='x')))
                fig.update_layout(title=f"{col} Daily Average (with Alerts)",
                                  xaxis_title="Date", yaxis_title=col,
                                  template="plotly_white")
                fig.show()

        # -----------------------------
        # ADLs & Lifestyle Charts
        # -----------------------------
        for col in ["stepsTaken","sleepHours","exerciseMinutes"]:
            if col in daily_data.columns:
                fig = px.line(daily_data, x='date', y=col, markers=True,
                              title=f"{col} Daily Total", template="plotly_white")
                fig.update_layout(xaxis_title="Date", yaxis_title=col)
                fig.show()

        # -----------------------------
        # Nutrition Charts
        # -----------------------------
        for col in ["calorieIntake","waterIntakeMl"]:
            if col in daily_data.columns:
                fig = px.bar(daily_data, x='date', y=col,
                             title=f"{col} Daily Total", template="plotly_white")
                fig.update_layout(xaxis_title="Date", yaxis_title=col)
                fig.show()

        # -----------------------------
        # Behaviour & Emotions
        # -----------------------------
        if "behaviourTags" in patient_data.columns:
            print("\nBehaviour Tags (most recent):", patient_data["behaviourTags"].iloc[-1])
        if "emotionTags" in patient_data.columns and patient_data["emotionTags"].notna().any():
            emotions = patient_data[["date","emotionTags"]].dropna()
            emotions = emotions.assign(emotion=emotions["emotionTags"].str.split(",")).explode("emotion")
            emotions["emotion"] = emotions["emotion"].str.strip()
            emotion_counts = emotions.groupby("emotion").size().reset_index(name="count")
            if not emotion_counts.empty:
                fig = px.pie(emotion_counts, values="count", names="emotion",
                             title="Emotion Distribution", hole=0.4,
                             color_discrete_sequence=px.colors.qualitative.Pastel)
                fig.update_traces(textposition='inside', textinfo='percent+label')
                fig.show()

        # -----------------------------
        # Clinical Notes
        # -----------------------------
        if "nursingNote" in patient_data.columns:
            print("\nMost Recent Nursing Note:\n", patient_data["nursingNote"].iloc[-1])
        if "clinicalSummary" in patient_data.columns:
            print("\nMost Recent AI Clinical Summary:\n", patient_data["clinicalSummary"].iloc[-1])
        

        print("\n--- Feature Correlation Matrix ---")
        numeric_cols = vitals_cols + adls_nutrition_cols
        correlation_matrix = daily_data[numeric_cols].corr()
        fig_corr = px.imshow(
            correlation_matrix,
            text_auto=True,
            aspect="auto",
            color_continuous_scale=px.colors.diverging.RdBu,
            title="Daily Feature Correlation Matrix"
        )
        fig_corr.update_layout(
            xaxis_nticks=len(numeric_cols),
            yaxis_nticks=len(numeric_cols),
            template="plotly_white"
        )
        fig_corr.show()

# Link dropdown
patient_selector.observe(update_dashboard, names="value")
display(patient_selector, output)
update_dashboard(None)

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.ensemble import IsolationForest
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# Load the highly correlated patient data
df = pd.read_csv(sheet_url, parse_dates=["observationStart"])

# List of numerical features for the model
features = ["heartRate", "spo2", "temperature", "stepsTaken", "sleepHours", "exerciseMinutes"]

# Convert vitals to numeric
for col in ["heartRate", "spo2", "temperature"]:
    df[col] = df[col].astype(str).str.extract(r"(\d+\.?\d*)")[0].astype(float)


# Dictionary to hold the Isolation Forest model and scaler for each patient
patient_models = {}

# ----------------------------------------------------
# Loop through each patient to train their unique model
# ----------------------------------------------------
for patient_id in df['patientId'].unique():
    print(f"\n--- Building Anomaly Detection Profile for {patient_id} ---")
    patient_data = df[df['patientId'] == patient_id].copy()
    
    # Select and scale the features
    X = patient_data[features]
    scaler = MinMaxScaler()
    X_scaled = scaler.fit_transform(X)
    
    # Initialize and train the Isolation Forest model
    # 'contamination' is the expected proportion of anomalies in the dataset.
    # We set it to 0.05, assuming 5% of the data points are abnormal.
    iso_forest = IsolationForest(contamination=0.05, random_state=42)
    patient_data['anomaly_score'] = iso_forest.fit_predict(X_scaled)
    
    # Store the model and scaler for later use
    patient_models[patient_id] = {
        'data': patient_data,
        'scaler': scaler,
        'isolation_forest': iso_forest
    }
    
    # ----------------------------------------------------
    # Visualization and Analysis for the current patient
    # ----------------------------------------------------
    print("Visualizing Anomaly Detection Results:")
    
    # Identify the anomalies
    anomalies = patient_data[patient_data['anomaly_score'] == -1]
    
    # Plot Heart Rate with anomalies highlighted
    plt.figure(figsize=(12, 6))
    plt.plot(patient_data['observationStart'], patient_data['heartRate'], label='Heart Rate', color='blue')
    plt.scatter(anomalies['observationStart'], anomalies['heartRate'], color='red', s=100, label='Anomaly', marker='x')
    plt.title(f'{patient_id} Heart Rate with Detected Anomalies')
    plt.xlabel('Date')
    plt.ylabel('Heart Rate (bpm)')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Plot Steps Taken with anomalies highlighted
    plt.figure(figsize=(12, 6))
    plt.plot(patient_data['observationStart'], patient_data['stepsTaken'], label='Steps Taken', color='green')
    plt.scatter(anomalies['observationStart'], anomalies['stepsTaken'], color='red', s=100, label='Anomaly', marker='x')
    plt.title(f'{patient_id} Steps Taken with Detected Anomalies')
    plt.xlabel('Date')
    plt.ylabel('Steps Taken')
    plt.legend()
    plt.grid(True)
    plt.show()

    # Print a summary of a few detected anomalies to demonstrate the model's output
    print("\n--- Summary of First 5 Detected Anomalies ---")
    if not anomalies.empty:
        print(anomalies[['observationStart', 'heartRate', 'stepsTaken', 'sleepHours', 'alerts']].head())
    else:
        print("No anomalies were detected based on the current contamination setting.")
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt
import seaborn as sns


df = pd.read_csv(sheet_url, parse_dates=["observationStart"])

# --- FIX 1: Ensure patientId is a string type to avoid mixing with numeric comparisons ---
df['patientId'] = df['patientId'].astype(str)

# List of numerical features that might be objects
vitals_cols = ["heartRate", "spo2", "temperature", "bloodPressure"]
adls_nutrition_cols = ["stepsTaken", "sleepHours", "exerciseMinutes", "calorieIntake", "waterIntakeMl"]

# --- CRUCIAL FIX 2: Ensure all columns are numeric before aggregation ---
for col in vitals_cols + adls_nutrition_cols:
    # Use .str.extract() to get the numeric part from strings, then convert to float
    df[col] = df[col].astype(str).str.extract(r'(\d+\.?\d*)', expand=False)
    
    # Coerce any remaining non-numeric values to NaN and convert to float
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Aggregate data to a daily level
daily_df = df.groupby(['patientId', df['observationStart'].dt.date]).agg(
    heartRate=('heartRate', 'mean'),
    spo2=('spo2', 'mean'),
    temperature=('temperature', 'mean'),
    stepsTaken=('stepsTaken', 'sum'),
    sleepHours=('sleepHours', 'sum'),
    exerciseMinutes=('exerciseMinutes', 'sum'),
    calorieIntake=('calorieIntake', 'sum'),
    waterIntakeMl=('waterIntakeMl', 'sum'),
    state=('state', 'first')
).reset_index()

# --- FIX 3: Fill any NaN values that may have been created during aggregation ---
# This prevents errors during the train_test_split.
daily_df.fillna(0, inplace=True)

# ----------------------------------------
# Feature Engineering and Encoding
# ----------------------------------------
# Define features and target
features = ['heartRate', 'spo2', 'temperature', 'stepsTaken', 'sleepHours', 'exerciseMinutes', 'calorieIntake', 'waterIntakeMl', 'patientId']
target = 'state'

X = daily_df[features]
y = daily_df[target]

# One-hot encode the 'patientId' to make it a usable feature for the model
encoder = OneHotEncoder(handle_unknown='ignore')
patient_id_encoded = encoder.fit_transform(X[['patientId']]).toarray()
patient_id_df = pd.DataFrame(patient_id_encoded, columns=encoder.get_feature_names_out(['patientId']))

# Combine numerical features with the encoded patient ID
X_numerical = X.drop('patientId', axis=1)
X_combined = pd.concat([X_numerical, patient_id_df], axis=1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_combined, y, test_size=0.2, random_state=42, stratify=y)
print("Data successfully cleaned, aggregated, and split for training.")
