In [3]:
import pandas as pd
import os
from pathlib import Path

def load_and_merge():
   
    hr_folder = "data/raw_data/Heartrate_Data"
    hr_dfs = []
    for file in Path(hr_folder).glob("*.csv"):
        df = pd.read_csv(file)
        df = df.rename(columns={"Value": "heart_rate", "Time": "timestamp"})
        hr_dfs.append(df[["timestamp", "heart_rate"]])
    
  
    spo2_folder = "data/raw_data/SpO2_Data"
    spo2_dfs = []
    for file in Path(spo2_folder).glob("*.csv"):
        df = pd.read_csv(file)
        df = df.rename(columns={"Value": "blood_oxygen", "Time": "timestamp"})
        spo2_dfs.append(df[["timestamp", "blood_oxygen"]])
    
  
    merged_df = pd.merge(
        pd.concat(hr_dfs),
        pd.concat(spo2_dfs),
        on="timestamp",
        how="inner"  
    )
    
  
    merged_df["activity_level"] = pd.cut(
        merged_df["heart_rate"],
        bins=[0, 60, 100, 200],
        labels=["low", "moderate", "high"]
    )
    
    return merged_df.dropna()

if __name__ == "_main_":
    df = load_and_merge()
    df.to_csv("data/processed_data.csv", index=False)
    print(" Data merged and saved to 'data/processed_data.csv'")

In [5]:
import pandas as pd
from pathlib import Path

def display_raw_data():
    
    hr_folder = "data/raw_data/Heartrate_Data"
    print("\n=== RAW HEART RATE DATA ===")
    for i, file in enumerate(Path(hr_folder).glob("*.csv")):
        df = pd.read_csv(file)
        print(f"\nFile {i+1}: {file.name}")
        print(df.head(3))  
        print(f"Records: {len(df)} | Time Range: {df['Time'].min()} to {df['Time'].max()}")

   
    spo2_folder = "data/raw_data/SpO2_Data"
    print("\n=== RAW SpO2 DATA ===")
    for i, file in enumerate(Path(spo2_folder).glob("*.csv")):
        df = pd.read_csv(file)
        print(f"\nFile {i+1}: {file.name}")
        print(df.head(3)) 
        print(f"Records: {len(df)} | Time Range: {df['Time'].min()} to {df['Time'].max()}")

if __name__ == "__main__":
    display_raw_data()


=== RAW HEART RATE DATA ===

=== RAW SpO2 DATA ===


In [11]:

import numpy as np
from pathlib import Path


Path("data").mkdir(exist_ok=True)


np.random.seed(42) 
data = {
    'id': range(1, 101),
    'value_1': np.random.normal(0, 1, 100),
    'value_2': np.random.normal(5, 2, 100),
    'category': np.random.choice(['A', 'B', 'C'], 100)
}


df = pd.DataFrame(data)


df['processed_value'] = df['value_1'] + df['value_2']
df['normalized'] = (df['processed_value'] - df['processed_value'].mean()) / df['processed_value'].std()


output_path = Path("data/processed_data.csv")
df.to_csv(output_path, index=False)

print(f"Data processed and saved to: {output_path.absolute()}")

Data processed and saved to: C:\Users\Admin\data\processed_data.csv


In [12]:

from pathlib import Path


Path("data").mkdir(exist_ok=True)


input_path = Path("data/processed_data.csv")


if not input_path.exists():
    raise FileNotFoundError(f"Please run data processing script first. Expected file at: {input_path.absolute()}")

In [22]:
from sklearn.ensemble import IsolationForest
import joblib
import pandas as pd 

df = pd.read_csv(r'\Users\Admin\data\processed_data.csv')


print("Available columns:", df.columns.tolist())


model = IsolationForest(
    contamination=0.05, 
    random_state=42,
    n_estimators=100
)


features = df.columns.tolist()[:2]  
print(f"Using these columns for training: {features}")
model.fit(df[features]) 

import os  
os.makedirs('models', exist_ok=True) 
joblib.dump(model, 'models/anomaly_model.pkl')
print(" Model trained on Kaggle data and saved to models/anomaly_model.pkl")

Available columns: ['id', 'value_1', 'value_2', 'category', 'processed_value', 'normalized']
Using these columns for training: ['id', 'value_1']
 Model trained on Kaggle data and saved to models/anomaly_model.pkl


In [26]:
def test_model():

    try:
      
        test_data_path = Path("your_actual_data_directory/processed_data.csv")
       
        
        if not test_data_path.exists():
            raise FileNotFoundError(f"Test data not found at {test_data_path}")
        
        test_df = pd.read_csv(test_data_path)
        print(f" Test data loaded: {len(test_df)} records")
    except Exception as e:
        print(f" Error loading test data: {e}")
        return  
    
    
    required_cols = ['heart_rate', 'blood_oxygen']
    if not all(col in test_df.columns for col in required_cols):
        available_required_cols = [col for col in required_cols if col in test_df.columns]
        if not available_required_cols:
            raise ValueError("None of the required columns are available in the dataset")
        X_test = test_df[available_required_cols]
       
    else:
        X_test = test_df[required_cols]
    
  
    test_df['anomaly_score'] = model.decision_function(X_test)
    test_df['prediction'] = model.predict(X_test)
    test_df['prediction'] = test_df['prediction'].map({1: 0, -1: 1})  
    
  
    if 'true_label' in test_df.columns:  
        y_true = test_df['true_label']
        y_pred = test_df['prediction']
        
        print("\n Classification Report:")
        print(classification_report(y_true, y_pred, target_names=['Normal', 'Anomaly']))
        
        print("\n Confusion Matrix:")
        cm = confusion_matrix(y_true, y_pred)
        sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', 
                    xticklabels=['Normal', 'Anomaly'], 
                    yticklabels=['Normal', 'Anomaly'])
        plt.xlabel('Predicted')
        plt.ylabel('Actual')
        plt.title('Anomaly Detection Performance')
        plt.show()
    else:
        print("\n Anomaly Distribution (no ground truth available):")
        print(test_df['prediction'].value_counts(normalize=True))
    

    output_path = Path("results/predictions.csv")
    output_path.parent.mkdir(exist_ok=True)
    test_df.to_csv(output_path, index=False)
    print(f"\n Predictions saved to {output_path.absolute()}")
    

    print("\n Top 5 Anomalies Detected:")
    print(test_df.nlargest(5, 'anomaly_score')[['timestamp', 'heart_rate', 'blood_oxygen', 'anomaly_score']])

if __name__ == "__main__":
    test_model()

 Error loading test data: Test data not found at your_actual_data_directory\processed_data.csv
