In [13]:
import pandas as pd
import numpy as np
import xgboost as xgb
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import make_pipeline
import json
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
import joblib

# 1. Wczytywanie i przetwarzanie danych
# -------------------------------------------------
print("Ładowanie danych...")

# Wczytaj dane glikemiczne
glucose_df = pd.read_csv('Michał._glucose_8-3-2025.csv', skiprows=1)
glucose_df['date'] = pd.to_datetime(glucose_df['Device Timestamp'], format='%d-%m-%Y %H:%M', errors='coerce')
glucose_df = glucose_df[['date', 'Historic Glucose mg/dL']].rename(columns={'Historic Glucose mg/dL': 'glucose'})
glucose_df = glucose_df.dropna()

# Wczytaj dane posiłków
with open('data (6).json') as f:
    meals_data = json.load(f)

# 2. Integracja danych
# -------------------------------------------------
print("\nIntegracja danych...")

records = []
for user_id, entries in meals_data.items():
    for entry in entries:
        try:
            calc_data = entry.get('calculatorData')
            if not calc_data:
                continue
                
            meal_time = pd.to_datetime(calc_data['date']).tz_localize(None)
            meals = entry.get('meals', [])
            
            record = {
                'timestamp': meal_time,
                'pre_glucose': calc_data['glucose'],
                'insulin': calc_data['units']['short'],
                'carbs': sum(m.get('carbs', 0) for m in meals),
                'hour': meal_time.hour,
                'weekday': meal_time.weekday()
            }
            
            # Analiza glikemii 1-3h po posiłku
            post_meal = glucose_df[
                (glucose_df['date'] > meal_time + pd.Timedelta(hours=1)) &
                (glucose_df['date'] <= meal_time + pd.Timedelta(hours=3))
            ]
            
            if not post_meal.empty:
                record['max_glucose'] = post_meal['glucose'].max()
                record['min_glucose'] = post_meal['glucose'].min()
                record['glucose_variability'] = record['max_glucose'] - record['min_glucose']
            else:
                record['max_glucose'] = np.nan
                record['min_glucose'] = np.nan
                record['glucose_variability'] = np.nan
            
            records.append(record)
        except Exception as e:
            print(f"Błąd przetwarzania rekordu: {str(e)[:50]}...")
            continue

full_df = pd.DataFrame(records).dropna()

# 3. Definicja klas z automatycznym mapowaniem
# -------------------------------------------------
print("\nTworzenie klas docelowych...")

class_mapping = {
    (True, False): 0,   # Hipoglikemia bez hiperglikemii
    (False, True): 1,   # Hiperglikemia bez hipoglikemii
    (True, True): 2,    # Mieszany efekt
    (False, False): 3   # Prawidłowy zakres
}

full_df['hypoglycemia'] = full_df['min_glucose'] < 70
full_df['hyperglycemia'] = full_df['max_glucose'] > 180
full_df['dose_class'] = full_df[['hypoglycemia', 'hyperglycemia']].apply(
    lambda x: class_mapping[(x[0], x[1])], axis=1
)

# 4. Transformacje cech
# -------------------------------------------------
print("\nPrzygotowanie cech...")

# Bezpieczne transformacje
full_df['carb_ratio'] = np.where(
    full_df['insulin'] > 0,
    full_df['carbs'] / full_df['insulin'],
    0
)

full_df['glucose_change'] = full_df['max_glucose'] - full_df['pre_glucose']
full_df['hour_sin'] = np.sin(2 * np.pi * full_df['hour']/24)
full_df['hour_cos'] = np.cos(2 * np.pi * full_df['hour']/24)
full_df['insulin_efficiency'] = np.where(
    full_df['glucose_change'] != 0,
    full_df['insulin'] / abs(full_df['glucose_change']),
    0
)

# 5. Analiza i przygotowanie danych
# -------------------------------------------------
features = [
    'pre_glucose', 'insulin', 'carbs', 
    'hour_sin', 'hour_cos', 'weekday',
    'carb_ratio', 'glucose_change', 
    'insulin_efficiency'
]

X = full_df[features]
y = full_df['dose_class']

# Automatyczne wykrywanie klas
unique_classes = sorted(y.unique())
class_names = {
    0: 'Hipoglikemia',
    1: 'Hiperglikemia',
    2: 'Mieszany efekt',
    3: 'Prawidłowy'
}

print("\nRozkład klas:")
print(y.value_counts().sort_index())

# 6. Pipeline przetwarzania
# -------------------------------------------------
preprocessor = ColumnTransformer([
    ('scale', StandardScaler(), ['pre_glucose', 'insulin', 'carbs', 'carb_ratio', 'glucose_change']),
    ('time', 'passthrough', ['hour_sin', 'hour_cos']),
    ('encode', OneHotEncoder(handle_unknown='ignore'), ['weekday'])
])

model = make_pipeline(
    preprocessor,
    SMOTE(sampling_strategy='not majority', k_neighbors=2),
    xgb.XGBClassifier(
        objective='multi:softmax',
        num_class=len(unique_classes),
        eval_metric='mlogloss',
        n_estimators=300,
        max_depth=5,
        learning_rate=0.1,
        subsample=0.9,
        colsample_bytree=0.9,
        gamma=0.1,
        random_state=42
    )
)

# 7. Trenowanie modelu
# -------------------------------------------------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, 
    test_size=0.2, 
    stratify=y,
    random_state=42
)

print("\nTrenowanie modelu...")
model.fit(X_train, y_train)

# 8. Ewaluacja
# -------------------------------------------------
print("\nRaport klasyfikacji:")
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=class_names.values()))

# 9. Funkcje diagnostyczne
# -------------------------------------------------
def plot_feature_importance(model):
    xgb_model = model.named_steps['xgbclassifier']
    feature_names = model.named_steps['columntransformer'].get_feature_names_out()
    
    importance = pd.Series(xgb_model.feature_importances_, index=feature_names)
    importance = importance.sort_values(ascending=True)
    
    plt.figure(figsize=(10,6))
    importance.plot(kind='barh', color='teal')
    plt.title('Ważność cech w modelu')
    plt.tight_layout()
    plt.show()

def predict_dose(sample):
    sample_df = pd.DataFrame([sample])
    
    # Transformacje
    sample_df['hour_sin'] = np.sin(2 * np.pi * sample_df['hour']/24)
    sample_df['hour_cos'] = np.cos(2 * np.pi * sample_df['hour']/24)
    sample_df['carb_ratio'] = sample_df['carbs'] / (sample_df['insulin'] + 1e-6)
    sample_df['glucose_change'] = 180 - sample_df['pre_glucose']  # Zakładany cel
    sample_df['insulin_efficiency'] = sample_df['insulin'] / (abs(sample_df['glucose_change']) + 1e-6)
    
    # Predykcja
    proba = model.predict_proba(sample_df[features])[0]
    prediction = model.predict(sample_df[features])[0]
    
    print("\nWejściowe parametry:")
    print(f"- Glukoza przed: {sample['pre_glucose']}")
    print(f"- Insulina: {sample['insulin']}")
    print(f"- Węglowodany: {sample['carbs']}")
    print(f"- Godzina: {sample['hour']}:00")
    
    print("\nPrawdopodobieństwa:")
    for cls, prob in zip(sorted(class_names), proba):
        print(f"- {class_names[cls]}: {prob:.1%}")
    
    print(f"\nZalecenie: {class_names[prediction]}")

# 10. Testowanie
# -------------------------------------------------
test_case = {
    'pre_glucose': 180,
    'insulin': 5,
    'carbs': 5,
    'hour': 13,
    'weekday': 3
}

print("\nTestowanie przypadku:")
predict_dose(test_case)
plot_feature_importance(model)

# Zapisz model
joblib.dump(model, 'insulin_dose_model_prod.pkl')
print("\nModel zapisano jako: insulin_dose_model_prod.pkl")

Ładowanie danych...

Integracja danych...

Tworzenie klas docelowych...

Przygotowanie cech...

Rozkład klas:
dose_class
1    37
3    54
Name: count, dtype: int64

Trenowanie modelu...


ValueError: Invalid classes inferred from unique values of `y`.  Expected: [0 1], got [1 3]