In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import pandas as pd
import os

# Path to the folder containing CSV files
folder_path = '/content/drive/My Drive/Preprocessed'

# Combine all CSV files into one DataFrame
all_data = pd.DataFrame()

for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):  # Ensure it's a CSV file
        file_path = os.path.join(folder_path, file_name)
        # Specify the semicolon as the delimiter
        temp_data = pd.read_csv(file_path, sep=';')
        all_data = pd.concat([all_data, temp_data], ignore_index=True)

# Display the combined dataset
print("Combined DataFrame:")
print(all_data.info())
print(all_data.head())


In [None]:
import pandas as pd

# Load one of the CSV files for inspection
file_path = '/content/drive/My Drive/Preprocessed/HUPA0015P.csv'  # Replace with the actual path
data = pd.read_csv(file_path, sep=';', header=None)  # Use sep=';' to handle semicolon delimiter

# Display the first few rows
print(data.head())

In [None]:
data = pd.read_csv(file_path, sep=';', header=0, engine='python')  # Use 'python' engine for better compatibility
print(data.columns)
print(data.head())


In [None]:
import os

# Path to the folder
folder_path = '/content/drive/My Drive/Preprocessed'

all_data = pd.DataFrame()

for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(folder_path, file_name)

        # Read and split the data
        temp_data = pd.read_csv(file_path, header=None)
        temp_data_split = temp_data[0].str.split(';', expand=True)
        temp_data_split.columns = ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input']

        # Combine into the main DataFrame
        all_data = pd.concat([all_data, temp_data_split], ignore_index=True)

print(all_data.info())
print(all_data.head())


In [None]:

import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Combine all preprocessed CSV files
preprocessed_dir = '/content/drive/My Drive/Preprocessed'
all_data = pd.DataFrame()

for file_name in os.listdir(preprocessed_dir):
    file_path = os.path.join(preprocessed_dir, file_name)
    # Read and split the data, assuming semicolon delimiter and no header
    temp_data = pd.read_csv(file_path, header=None)
    temp_data_split = temp_data[0].str.split(';', expand=True)
    temp_data_split.columns = ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input']
    all_data = pd.concat([all_data, temp_data_split], ignore_index=True)
# Handle missing values
all_data = all_data.dropna()

# Separate features and target
target_column = 'glucose'  # Replace with your actual target column name
X = all_data.drop(columns=[target_column, 'time'])  # Drop 'time' column from features
y = all_data[target_column]

# Convert columns to numeric, errors='coerce' to handle potential non-numeric values
X = X.apply(pd.to_numeric, errors='coerce')

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error

# Combine all preprocessed CSV files
preprocessed_dir = '/content/drive/My Drive/Preprocessed'
all_data = pd.DataFrame()

for file_name in os.listdir(preprocessed_dir):
    file_path = os.path.join(preprocessed_dir, file_name)
    # Read and split the data, assuming semicolon delimiter and no header
    temp_data = pd.read_csv(file_path, header=None)
    temp_data_split = temp_data[0].str.split(';', expand=True)
    temp_data_split.columns = ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input']
    all_data = pd.concat([all_data, temp_data_split], ignore_index=True)

# Handle missing values
all_data = all_data.dropna()

# Separate features and target
target_column = 'glucose'  # Replace with your actual target column name
X = all_data.drop(columns=[target_column, 'time'])  # Drop 'time' column from features
y = all_data[target_column]

# Convert columns to numeric, errors='coerce' to handle potential non-numeric values
X = X.apply(pd.to_numeric, errors='coerce')
y = pd.to_numeric(y, errors='coerce') # Convert y to numeric as well

# Replace infinite values with NaN so they can be imputed
X.replace([np.inf, -np.inf], np.nan, inplace=True)
y.replace([np.inf, -np.inf], np.nan, inplace=True)

# Impute NaN values using the mean (or another strategy)
X = X.fillna(X.mean())
y = y.fillna(y.mean())

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

# Train XGBoost model
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=6)
xgb_model.fit(X_train, y_train)

# Evaluate
predictions = xgb_model.predict(X_test)
mse = mean_squared_error(y_test, predictions)
print("XGBoost Mean Squared Error:", mse)

In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from xgboost import XGBRegressor
from sklearn.metrics import mean_squared_error, confusion_matrix, precision_score, recall_score, accuracy_score, f1_score
import numpy as np

# ... (Your existing XGBoost code: data loading, preprocessing, model training) ...

# Evaluate
predictions_xgb = xgb_model.predict(X_test)
mse = mean_squared_error(y_test, predictions_xgb)
print("XGBoost Mean Squared Error:", mse)

# --- Categorization for Classification Metrics ---
thresholds = [70, 140]  # Example thresholds (adjust as needed)
y_test_cat = np.digitize(y_test, thresholds)
predictions_xgb_cat = np.digitize(predictions_xgb, thresholds)

# --- Classification Metrics ---
cm_xgb = confusion_matrix(y_test_cat, predictions_xgb_cat)
precision_xgb = precision_score(y_test_cat, predictions_xgb_cat, average='weighted')
recall_xgb = recall_score(y_test_cat, predictions_xgb_cat, average='weighted')
accuracy_xgb = accuracy_score(y_test_cat, predictions_xgb_cat)
f1_xgb = f1_score(y_test_cat, predictions_xgb_cat, average='weighted')

print("\nXGBoost Classification Metrics:")
print("Confusion Matrix:", cm_xgb)
print("Precision:", precision_xgb)
print("Recall:", recall_xgb)
print("Accuracy:", accuracy_xgb)
print("F1-score:", f1_xgb)