In [None]:
# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.
import kagglehub
uciml_pima_indians_diabetes_database_path = kagglehub.dataset_download('uciml/pima-indians-diabetes-database')
mkachuee_bloodpressuredataset_path = kagglehub.dataset_download('mkachuee/BloodPressureDataset')
mkachuee_noninvasivebp_path = kagglehub.dataset_download('mkachuee/noninvasivebp')
shayanfazeli_heartbeat_path = kagglehub.dataset_download('shayanfazeli/heartbeat')
harunshimanto_epileptic_seizure_recognition_path = kagglehub.dataset_download('harunshimanto/epileptic-seizure-recognition')
qiriro_stress_path = kagglehub.dataset_download('qiriro/stress')
adibadea_chbmitseizuredataset_path = kagglehub.dataset_download('adibadea/chbmitseizuredataset')
abdallahwagih_mit_bih_arrhythmia_database_path = kagglehub.dataset_download('abdallahwagih/mit-bih-arrhythmia-database')

print('Data source import complete.')


Using Colab cache for faster access to the 'pima-indians-diabetes-database' dataset.
Downloading from https://www.kaggle.com/api/v1/datasets/download/mkachuee/BloodPressureDataset?dataset_version_number=5...


100%|██████████| 4.60G/4.60G [00:56<00:00, 86.8MB/s]

Extracting files...





Downloading from https://www.kaggle.com/api/v1/datasets/download/mkachuee/noninvasivebp?dataset_version_number=1...


100%|██████████| 76.9M/76.9M [00:00<00:00, 177MB/s]

Extracting files...





Using Colab cache for faster access to the 'heartbeat' dataset.
Downloading from https://www.kaggle.com/api/v1/datasets/download/harunshimanto/epileptic-seizure-recognition?dataset_version_number=2...


100%|██████████| 2.77M/2.77M [00:00<00:00, 112MB/s]

Extracting files...





Downloading from https://www.kaggle.com/api/v1/datasets/download/qiriro/stress?dataset_version_number=1...


100%|██████████| 5.24G/5.24G [00:53<00:00, 106MB/s] 


Extracting files...
Downloading from https://www.kaggle.com/api/v1/datasets/download/adibadea/chbmitseizuredataset?dataset_version_number=8...


100%|██████████| 635M/635M [00:05<00:00, 130MB/s]

Extracting files...





Downloading from https://www.kaggle.com/api/v1/datasets/download/abdallahwagih/mit-bih-arrhythmia-database?dataset_version_number=1...


100%|██████████| 73.4M/73.4M [00:01<00:00, 56.8MB/s]

Extracting files...





Data source import complete.


In [None]:
# Step 1: Setup with Dataset Verification
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVR, SVC
from sklearn.metrics import classification_report, mean_absolute_error
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
import os

def load_or_simulate_dataset(dataset_name, default_rows=1000):
    """Try loading dataset or simulate reasonable defaults"""
    try:
        if dataset_name == "diabetes":
            return pd.read_csv('/kaggle/input/pima-indians-diabetes-database/diabetes.csv')
        elif dataset_name == "arrhythmia":
            return pd.read_csv('/kaggle/input/mitbih-arrhythmia-database/mitbih_train.csv', header=None)
        elif dataset_name == "stress":
            return pd.read_csv('/kaggle/input/stress-detection-in-employees/Stress.csv')  # Common filename
        else:
            raise FileNotFoundError
    except:
        print(f"⚠️ {dataset_name} dataset not found - simulating data")
        return simulate_dataset(dataset_name, default_rows)

def simulate_dataset(dataset_name, rows):
    """Create realistic synthetic data"""
    if dataset_name == "diabetes":
        data = pd.DataFrame({
            'Pregnancies': np.random.randint(0, 10, rows),
            'Glucose': np.random.normal(120, 30, rows).clip(70, 200),
            'BloodPressure': np.random.normal(70, 15, rows).clip(40, 100),
            'SkinThickness': np.random.normal(20, 5, rows),
            'Insulin': np.random.normal(100, 50, rows),
            'BMI': np.random.normal(30, 5, rows),
            'DiabetesPedigreeFunction': np.random.uniform(0.1, 1.5, rows),
            'Age': np.random.randint(20, 70, rows),
            'Outcome': np.random.binomial(1, 0.3, rows)
        })
        data['hrv'] = np.where(data['Outcome'] == 1,
                             np.random.normal(50, 5, rows),
                             np.random.normal(70, 5, rows))

    elif dataset_name == "arrhythmia":
        data = pd.DataFrame(np.random.randn(rows, 187))  # MIT-BIH has 187 timesteps
        data['label'] = np.random.randint(0, 5, rows)  # 5 classes

    elif dataset_name == "stress":
        data = pd.DataFrame({
            'snoring_range': np.random.normal(50, 10, rows),
            'respiration_rate': np.random.normal(20, 3, rows),
            'body_temp': np.random.normal(98, 1, rows),
            'limb_movement': np.random.normal(5, 2, rows),
            'blood_oxygen': np.random.normal(97, 2, rows),
            'eye_movement': np.random.normal(5, 2, rows),
            'sleeping_hours': np.random.normal(6, 1, rows),
            'heart_rate': np.random.normal(70, 10, rows),
            'stress': np.random.binomial(1, 0.25, rows)
        })
        data['gsr'] = np.where(data['stress'] == 1,
                             np.random.normal(8, 1, rows),
                             np.random.normal(4, 1, rows))

    return data

# Load or simulate all datasets
diabetes_data = load_or_simulate_dataset("diabetes")
arrhythmia_data = load_or_simulate_dataset("arrhythmia")
stress_data = load_or_simulate_dataset("stress")

# Verify datasets
print("✅ Loaded datasets:")
print(f"- Diabetes: {diabetes_data.shape}")
print(f"- Arrhythmia: {arrhythmia_data.shape}")
print(f"- Stress: {stress_data.shape}")

⚠️ arrhythmia dataset not found - simulating data
⚠️ stress dataset not found - simulating data
✅ Loaded datasets:
- Diabetes: (768, 9)
- Arrhythmia: (1000, 188)
- Stress: (1000, 10)


In [None]:
# Step 2: Diabetes Model with HRV Simulation
if 'hrv' not in diabetes_data.columns:
    diabetes_data['hrv'] = np.where(diabetes_data['Outcome'] == 1,
                                  np.random.normal(50, 5, len(diabetes_data)),
                                  np.random.normal(70, 5, len(diabetes_data)))

X_dia = diabetes_data[['Glucose', 'BMI', 'Age', 'hrv']]
y_dia = diabetes_data['Outcome']

X_train, X_test, y_train, y_test = train_test_split(
    X_dia, y_dia, test_size=0.2, random_state=42
)

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

rf_dia = RandomForestClassifier(n_estimators=100)
rf_dia.fit(X_train_scaled, y_train)

print("Diabetes Model Performance:")
print(classification_report(y_test, rf_dia.predict(X_test_scaled)))

Diabetes Model Performance:
              precision    recall  f1-score   support

           0       1.00      0.99      0.99        99
           1       0.98      1.00      0.99        55

    accuracy                           0.99       154
   macro avg       0.99      0.99      0.99       154
weighted avg       0.99      0.99      0.99       154



In [None]:
# Step 3: Arrhythmia Detection (Works with MIT-BIH or simulated)
arrhythmia_data.columns = [f'ecg_{i}' for i in range(arrhythmia_data.shape[1]-1)] + ['label']

# Simple version for Kaggle (full LSTM would need more preprocessing)
X_arr = arrhythmia_data.iloc[:, :-1]
y_arr = arrhythmia_data['label']

# Binary classification (normal vs abnormal)
y_arr_binary = (y_arr > 0).astype(int)

rf_arr = RandomForestClassifier()
rf_arr.fit(X_arr, y_arr_binary)

print("Arrhythmia Detection (Binary):")
print(classification_report(y_arr_binary, rf_arr.predict(X_arr)))

Arrhythmia Detection (Binary):
              precision    recall  f1-score   support

           0       1.00      1.00      1.00       201
           1       1.00      1.00      1.00       799

    accuracy                           1.00      1000
   macro avg       1.00      1.00      1.00      1000
weighted avg       1.00      1.00      1.00      1000



In [None]:
# Step 4: Stress Detection with GSR Simulation
if 'gsr' not in stress_data.columns:
    stress_data['gsr'] = np.where(stress_data['stress'] == 1,
                                np.random.normal(8, 1, len(stress_data)),
                                np.random.normal(4, 1, len(stress_data)))

X_stress = stress_data[['gsr', 'heart_rate', 'body_temp']]
y_stress = stress_data['stress']

svm_stress = SVC(probability=True)
svm_stress.fit(X_stress, y_stress)

print("Stress Detection Performance:")
print(classification_report(y_stress, svm_stress.predict(X_stress)))

Stress Detection Performance:
              precision    recall  f1-score   support

           0       0.89      1.00      0.94       750
           1       0.99      0.62      0.76       250

    accuracy                           0.90      1000
   macro avg       0.94      0.81      0.85      1000
weighted avg       0.91      0.90      0.89      1000



In [None]:
# Step 5: Blood Pressure Simulation (since dataset was missing)
# Create synthetic BP data using known physiological relationships
bp_data = pd.DataFrame({
    'age': np.random.randint(20, 80, 500),
    'bmi': np.random.normal(25, 5, 500).clip(18, 40),
    'heart_rate': np.random.normal(70, 10, 500).clip(50, 100),
    'ptt': np.random.normal(0.15, 0.03, 500)  # Pulse Transit Time
})
bp_data['sbp'] = 110 + 0.5*bp_data['age'] + 0.3*bp_data['bmi'] - 0.2*bp_data['ptt']*1000
bp_data['dbp'] = 70 + 0.3*bp_data['age'] + 0.2*bp_data['bmi'] - 0.1*bp_data['ptt']*1000

# Add simulated ECG quality
bp_data['ecg_quality'] = np.random.uniform(0.8, 1.0, len(bp_data))

# Train BP model
svr_sbp = SVR()
svr_sbp.fit(bp_data[['ptt', 'ecg_quality']], bp_data['sbp'])

print("BP Model MAE:", mean_absolute_error(
    bp_data['sbp'],
    svr_sbp.predict(bp_data[['ptt', 'ecg_quality']])
))

BP Model MAE: 8.616784912623283


In [None]:
# Step 6: Export for Smartwatch Integration
import joblib

joblib.dump({
    'diabetes_scaler': scaler,
    'diabetes_model': rf_dia,
    'arrhythmia_model': rf_arr,
    'stress_model': svm_stress,
    'bp_model': svr_sbp
}, 'health_models.pkl')

print("✅ All models exported successfully!")

✅ All models exported successfully!


In [None]:
# Célula para Otimização com Algoritmo Genético

# Instale a biblioteca se precisar
#!pip install deap

import random
from deap import base, creator, tools, algorithms
from sklearn.model_selection import cross_val_score

# --- Passo 1: Configurar o Algoritmo Genético ---
# Criar a função de aptidão: queremos maximizar a precisão
creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

# Definir os hiperparâmetros que o algoritmo irá otimizar
# Exemplo para o Random Forest de diabetes:
toolbox.register("attr_n_estimators", random.randint, 50, 200)
toolbox.register("attr_max_depth", random.randint, 5, 20)
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_n_estimators, toolbox.attr_max_depth), n=1)

# Definir a população de indivíduos
toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Definir a função de avaliação (fitness)
def evaluate_diabetes(individual):
    n_estimators = individual[0]
    max_depth = individual[1]

    # Criar e treinar o modelo com os hiperparâmetros
    model = RandomForestClassifier(
        n_estimators=n_estimators,
        max_depth=max_depth,
        random_state=42
    )

    # Avaliar o modelo usando validação cruzada
    try:
        score = cross_val_score(model, X_dia, y_dia, cv=5, scoring='accuracy').mean()
    except Exception as e:
        print(f"Erro na avaliação: {e}")
        score = 0
    return (score,)

toolbox.register("evaluate", evaluate_diabetes)
toolbox.register("mate", tools.cxTwoPoint)  # Crossover
toolbox.register("mutate", tools.mutUniformInt, low=[50, 5], up=[200, 20], indpb=0.1)  # Mutação
toolbox.register("select", tools.selTournament, tournsize=3)  # Seleção

# --- Passo 2: Executar o Algoritmo Genético ---
print("Iniciando a otimização dos hiperparâmetros do modelo de diabetes...")

pop = toolbox.population(n=20)
hof = tools.HallOfFame(1)  # Armazenar o melhor indivíduo
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)
stats.register("max", np.max)

algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=20, stats=stats, halloffame=hof, verbose=True)

# --- Passo 3: Exibir o Melhor Resultado ---
print("\n--- Resultados da Otimização do Modelo de Diabetes ---")
best_individual = hof[0]
best_score = best_individual.fitness.values[0]

print(f"Melhores hiperparâmetros: n_estimators={best_individual[0]}, max_depth={best_individual[1]}")
print(f"Precisão de Validação Cruzada do melhor modelo: {best_score:.4f}")

# Exemplo de uso do melhor modelo
best_rf_dia = RandomForestClassifier(
    n_estimators=best_individual[0],
    max_depth=best_individual[1],
    random_state=42
)
best_rf_dia.fit(X_dia, y_dia)
print("\nDesempenho final do modelo otimizado:")
print(classification_report(y_test, best_rf_dia.predict(X_test)))

Collecting deap
  Downloading deap-1.4.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (13 kB)
Downloading deap-1.4.3-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (135 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/136.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m136.0/136.0 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: deap
Successfully installed deap-1.4.3
Iniciando a otimização dos hiperparâmetros do modelo de diabetes...
gen	nevals	avg     	min     	max     
0  	20    	0.988286	0.986988	0.989585
1  	11    	0.988351	0.988286	0.989585
2  	10    	0.988351	0.988286	0.989585
3  	12    	0.988221	0.986988	0.988286
4  	14    	0.988221	0.986988	0.988286
5  	9     	0.988286	0.988286	0.988286
6  	6     	0.988286	0.988286	0.988286
7  	17    	0.988156	0.986988	0.98

In [None]:
# Célula para Otimização de Hiperparâmetros do Modelo de Estresse

import random
from deap import base, creator, tools, algorithms
from sklearn.model_selection import cross_val_score
from sklearn.svm import SVC
import numpy as np

# A função de aptidão já foi criada na célula anterior
# creator.create("FitnessMax", base.Fitness, weights=(1.0,))
# creator.create("Individual", list, fitness=creator.FitnessMax)

toolbox = base.Toolbox()

# Definir os hiperparâmetros que o algoritmo irá otimizar para o SVC
toolbox.register("attr_C", random.uniform, 0.1, 10.0)
toolbox.register("attr_gamma", random.uniform, 0.001, 1.0)
toolbox.register("individual", tools.initCycle, creator.Individual,
                 (toolbox.attr_C, toolbox.attr_gamma), n=1)

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# Definir a função de avaliação (fitness) para o modelo de estresse
def evaluate_stress(individual):
    C = individual[0]
    gamma = individual[1]

    model = SVC(C=C, gamma=gamma, probability=True, random_state=42)

    try:
        score = cross_val_score(model, X_stress, y_stress, cv=5, scoring='accuracy').mean()
    except Exception as e:
        print(f"Erro na avaliação: {e}")
        score = 0
    return (score,)

toolbox.register("evaluate", evaluate_stress)
toolbox.register("mate", tools.cxTwoPoint)  # Crossover

# Corrigido: Usar mutGaussian para valores de ponto flutuante
# Os parâmetros são: mu (média), sigma (desvio padrão), e indpb (probabilidade de mutação por gene)
toolbox.register("mutate", tools.mutGaussian, mu=[0, 0], sigma=[1, 0.1], indpb=0.1)

toolbox.register("select", tools.selTournament, tournsize=3)  # Seleção

# --- 2. Executar o Algoritmo Genético ---
print("Iniciando a otimização dos hiperparâmetros do modelo de estresse...")

pop = toolbox.population(n=20)
hof = tools.HallOfFame(1)
stats = tools.Statistics(lambda ind: ind.fitness.values)
stats.register("avg", np.mean)
stats.register("min", np.min)
stats.register("max", np.max)

algorithms.eaSimple(pop, toolbox, cxpb=0.5, mutpb=0.2, ngen=20, stats=stats, halloffame=hof, verbose=True)

# --- 3. Exibir o Melhor Resultado ---
print("\n--- Resultados da Otimização do Modelo de Estresse ---")
best_individual_stress = hof[0]
best_score_stress = best_individual_stress.fitness.values[0]

print(f"Melhores hiperparâmetros: C={best_individual_stress[0]:.4f}, gamma={best_individual_stress[1]:.4f}")
print(f"Precisão de Validação Cruzada do melhor modelo: {best_score_stress:.4f}")

best_svm_stress = SVC(C=best_individual_stress[0], gamma=best_individual_stress[1], probability=True)
best_svm_stress.fit(X_stress, y_stress)
print("\nDesempenho final do modelo otimizado:")
print(classification_report(y_stress, best_svm_stress.predict(X_stress)))

Iniciando a otimização dos hiperparâmetros do modelo de estresse...
gen	nevals	avg    	min  	max  
0  	20    	0.96575	0.948	0.983
1  	14    	0.9708 	0.881	0.985
2  	9     	0.97925	0.926	0.985
3  	17    	0.9843 	0.981	0.986
4  	12    	0.9847 	0.982	0.986
5  	9     	0.98535	0.985	0.986
6  	9     	0.98575	0.985	0.986
7  	12    	0.986  	0.986	0.986
8  	14    	0.98555	0.982	0.986
9  	13    	0.98565	0.981	0.986
10 	12    	0.9858 	0.982	0.986
11 	10    	0.9857 	0.982	0.986
12 	8     	0.986  	0.986	0.986
13 	14    	0.98595	0.985	0.986
14 	7     	0.986  	0.986	0.986
15 	13    	0.986  	0.986	0.986
16 	12    	0.9858 	0.982	0.986
17 	16    	0.986  	0.986	0.986
Erro na avaliação: 
All the 5 fits failed.
It is very likely that your model is misconfigured.
You can try to debug the error by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most rece