In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import pandas as pd
import os

# Path to the folder containing CSV files
folder_path = '/content/drive/My Drive/Preprocessed'

# Combine all CSV files into one DataFrame
all_data = pd.DataFrame()

for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):  # Ensure it's a CSV file
        file_path = os.path.join(folder_path, file_name)
        # Specify the semicolon as the delimiter
        temp_data = pd.read_csv(file_path, sep=';')
        all_data = pd.concat([all_data, temp_data], ignore_index=True)

# Display the combined dataset
print("Combined DataFrame:")
print(all_data.info())
print(all_data.head())


In [None]:
import pandas as pd

# Load one of the CSV files for inspection
file_path = '/content/drive/My Drive/Preprocessed/HUPA0015P.csv'  # Replace with the actual path
data = pd.read_csv(file_path, sep=';', header=None)  # Use sep=';' to handle semicolon delimiter

# Display the first few rows
print(data.head())

In [None]:
data = pd.read_csv(file_path, sep=';', header=0, engine='python')  # Use 'python' engine for better compatibility
print(data.columns)
print(data.head())


In [None]:
import os

# Path to the folder
folder_path = '/content/drive/My Drive/Preprocessed'

all_data = pd.DataFrame()

for file_name in os.listdir(folder_path):
    if file_name.endswith('.csv'):
        file_path = os.path.join(folder_path, file_name)

        # Read and split the data
        temp_data = pd.read_csv(file_path, header=None)
        temp_data_split = temp_data[0].str.split(';', expand=True)
        temp_data_split.columns = ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input']

        # Combine into the main DataFrame
        all_data = pd.concat([all_data, temp_data_split], ignore_index=True)

print(all_data.info())
print(all_data.head())


In [None]:

import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler

# Combine all preprocessed CSV files
preprocessed_dir = '/content/drive/My Drive/Preprocessed'
all_data = pd.DataFrame()

for file_name in os.listdir(preprocessed_dir):
    file_path = os.path.join(preprocessed_dir, file_name)
    # Read and split the data, assuming semicolon delimiter and no header
    temp_data = pd.read_csv(file_path, header=None)
    temp_data_split = temp_data[0].str.split(';', expand=True)
    temp_data_split.columns = ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input']
    all_data = pd.concat([all_data, temp_data_split], ignore_index=True)
# Handle missing values
all_data = all_data.dropna()

# Separate features and target
target_column = 'glucose'  # Replace with your actual target column name
X = all_data.drop(columns=[target_column, 'time'])  # Drop 'time' column from features
y = all_data[target_column]

# Convert columns to numeric, errors='coerce' to handle potential non-numeric values
X = X.apply(pd.to_numeric, errors='coerce')

# Normalize features
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# Split into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error

# Combine all preprocessed CSV files
preprocessed_dir = '/content/drive/My Drive/Preprocessed'
all_data = pd.DataFrame()

for file_name in os.listdir(preprocessed_dir):
    file_path = os.path.join(preprocessed_dir, file_name)
    # Read and split the data, assuming semicolon delimiter and no header
    temp_data = pd.read_csv(file_path, header=None)
    temp_data_split = temp_data[0].str.split(';', expand=True)
    temp_data_split.columns = ['time', 'glucose', 'calories', 'heart_rate', 'steps', 'basal_rate', 'bolus_volume_delivered', 'carb_input']
    all_data = pd.concat([all_data, temp_data_split], ignore_index=True)

# Convert 'glucose' to numeric before dropping NaNs
all_data['glucose'] = pd.to_numeric(all_data['glucose'], errors='coerce')

# Drop rows with NaN values in 'glucose' column
all_data = all_data.dropna(subset=['glucose'])

# Extract glucose data for ARIMA
glucose_data = all_data['glucose'].values

# Split data into training and testing sets
train_data, test_data = train_test_split(glucose_data, test_size=0.2, shuffle=False)

# Fit ARIMA model
# Adjust order parameters (p, d, q) based on your data's characteristics
# You might need to analyze ACF and PACF plots to determine optimal values
model = ARIMA(train_data, order=(5, 1, 0))
model_fit = model.fit()

# Make predictions
predictions = model_fit.predict(start=len(train_data), end=len(glucose_data) - 1)

# Evaluate
mse = mean_squared_error(test_data, predictions)
print("ARIMA Mean Squared Error:", mse)



In [None]:
import pandas as pd
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import numpy as np

# ... (Data loading, preprocessing, and ARIMA model fitting as before) ...

# Make predictions
predictions = model_fit.predict(start=len(train_data), end=len(glucose_data) - 1)

# --- Regression Metrics ---
mse = mean_squared_error(test_data, predictions)
mae = mean_absolute_error(test_data, predictions)
rmse = np.sqrt(mse)  # Calculate RMSE
r2 = r2_score(test_data, predictions)

print("\nARIMA Regression Metrics:")
print(f"Mean Squared Error (MSE): {mse}")
print(f"Mean Absolute Error (MAE): {mae}")
print(f"Root Mean Squared Error (RMSE): {rmse}")
print(f"R-squared (R2): {r2}")

# --- Categorization for Classification Metrics ---
thresholds = [70, 140]  # Example thresholds (adjust as needed)
y_test_cat = np.digitize(test_data, thresholds)  # Using test_data for true values
predictions_cat = np.digitize(predictions, thresholds)

# --- Classification Metrics ---
from sklearn.metrics import confusion_matrix, precision_score, recall_score, accuracy_score, f1_score

cm = confusion_matrix(y_test_cat, predictions_cat)
precision = precision_score(y_test_cat, predictions_cat, average='weighted')
recall = recall_score(y_test_cat, predictions_cat, average='weighted')
accuracy = accuracy_score(y_test_cat, predictions_cat)
f1 = f1_score(y_test_cat, predictions_cat, average='weighted')

print("\nARIMA Classification Metrics:")
print("Confusion Matrix:", cm)
print("Precision:", precision)
print("Recall:", recall)
print("Accuracy:", accuracy)
print("F1-score:", f1)

# ... (Further analysis and visualization) ...