In [1]:
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np


from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix, roc_curve, auc, matthews_corrcoef, cohen_kappa_score
from sklearn.linear_model import RidgeClassifierCV


from sktime.datatypes._panel._convert import from_2d_array_to_nested
from sktime.transformations.panel.rocket import Rocket




In [2]:
data = pd.read_csv('C:/Users/DELL G7/Pictures/Camera Roll/projects/newdepression/combined_data.csv')

In [3]:
# # Convert the timestamp to datetime format
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Round the 'timestamp' column to the nearest hour
data['hourly_timestamp'] = data['timestamp'].dt.floor('H')


# Group by 'person' and 'hourly_timestamp' and aggregate activity data
hourly_data = data.groupby(['person', 'hourly_timestamp', 'status'])['activity'].agg(
    hourly_activity='sum').reset_index()

hourly_data.head()

Unnamed: 0,person,hourly_timestamp,status,hourly_activity
0,condition_1,2003-05-07 12:00:00,depressed,20793
1,condition_1,2003-05-07 13:00:00,depressed,17074
2,condition_1,2003-05-07 14:00:00,depressed,16751
3,condition_1,2003-05-07 15:00:00,depressed,13127
4,condition_1,2003-05-07 16:00:00,depressed,14313


In [4]:
number_of_unique_persons = hourly_data.groupby('person').size().sum()
print(number_of_unique_persons)

26230


In [5]:
number_of_unique_persons = len(hourly_data['person'].unique())
print(number_of_unique_persons)

55


In [6]:
# Encode the status as a binary variable
label_encoder = LabelEncoder()
hourly_data['status_encoded'] = label_encoder.fit_transform(hourly_data['status'])

# Define features and target
features = ['hourly_activity']  # Use the 'activity' column directly
target = 'status_encoded'

In [7]:
# Function to balance classes
def balance_classes(data, target_column):
    classes = data[target_column].unique()
    min_class_count = min(data[target_column].value_counts())
    balanced_data = pd.concat([
        data[data[target_column] == cls].sample(min_class_count, random_state=42)
        for cls in classes
    ])
    return balanced_data


In [8]:
# Person-based LOOCV with nested dataframes
results = []
for person in hourly_data['person'].unique():
    # Separate data for current person (testing) and others (training)
    test_data = hourly_data[hourly_data['person'] == person]
    train_data = hourly_data[hourly_data['person'] != person]

    # Balance the training data classes
    if len(train_data[target].unique()) > 1:
        train_data = balance_classes(train_data, target)

    # Convert features to nested format (individual timestamps)
    X_train = train_data[features].values.reshape(-1, 1)  # Reshape to 2D with single feature
    X_train_nested = from_2d_array_to_nested(X_train)

    # Keep target variable as pandas Series
    y_train = train_data[target].to_numpy('<U1')

    # Initialize and train the ROCKET classifier
    rocket = Rocket(num_kernels=10000, random_state=111)
    X_train_transform = rocket.fit_transform(X_train_nested)

    classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
    classifier.fit(X_train_transform, y_train)

    # Convert test data features to nested format
    X_test = test_data[features].values.reshape(-1, 1)  # Reshape for nesting
    X_test_nested = from_2d_array_to_nested(X_test)

    y_test = test_data[target].to_numpy('<U1')

    # Predict on the test set (current person's data)
    y_pred = classifier.predict(rocket.transform(X_test_nested))

    # Evaluate the model (for this person)
    accuracy = accuracy_score(y_test, y_pred)

    # Check if both classes are present in y_test
    if len(np.unique(y_test)) > 1:
        report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
    else:
        report = "Only one class present in y_test; skipping classification report."

    results.append({
        'person': person,
        'accuracy': accuracy,
        'classification_report': report
    })

KeyboardInterrupt: 

In [5]:
# # Person-based LOOCV with nested dataframes
# results = []
# for person in hourly_data['person'].unique():
#   # Separate data for current person (testing) and others (training)
#   test_data = hourly_data[hourly_data['person'] == person]
#   train_data = hourly_data[hourly_data['person'] != person]

#   # Convert features to nested format (individual timestamps)
#   X_train = train_data[features].values.reshape(-1, 1)  # Reshape to 2D with single feature
#   X_train_nested = from_2d_array_to_nested(X_train)

#   # Keep target variable as pandas Series
#   y_train = train_data[target].to_numpy('<U1')

#   # Initialize and train the ROCKET classifier
#   rocket = Rocket(num_kernels=10000, random_state=111)
#   X_train_transform = rocket.fit_transform(X_train_nested)

#   classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
#   classifier.fit(X_train_transform, y_train)

#   # Convert test data features to nested format
#   X_test = test_data[features].values.reshape(-1, 1)  # Reshape for nesting
#   X_test_nested = from_2d_array_to_nested(X_test)

#   y_test = test_data[target].to_numpy('<U1')

#   # Predict on the test set (current person's data)
#   y_pred = classifier.predict(rocket.transform(X_test_nested))

#   # Evaluate the model (for this person)
#   accuracy = accuracy_score(y_test, y_pred)
#   report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
#   results.append({
#       'person': person,
#       'accuracy': accuracy,
#       'classification_report': report
#   })

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_pr

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


ValueError: Number of classes, 1, does not match size of target_names, 2. Try specifying the labels parameter

In [None]:
# # Person-based LOOCV with nested dataframes
# results = []
# for person in daily_data['person'].unique():
#   # Separate data for current person (testing) and others (training)
#   test_data = daily_data[daily_data['person'] == person]
#   train_data = daily_data[daily_data['person'] != person]

#   # Convert features to nested format for training data
#   X_train = train_data[features].values  # Get the 2D array of features
#   X_train_nested = from_2d_array_to_nested(X_train)

#   # Keep target variable as pandas Series
#   y_train = train_data[target].to_numpy('<U1')

#   # Initialize and train the ROCKET classifier
#   rocket = Rocket(num_kernels=10000, random_state=111)
#   X_train_transform = rocket.fit_transform(X_train_nested)

#   classifier = RidgeClassifierCV(alphas=np.logspace(-3, 3, 10))
#   classifier.fit(X_train_transform, y_train)

#   # Convert test data features to nested format
#   X_test = test_data[features].values
#   X_test_nested = from_2d_array_to_nested(X_test) 
#   y_test = test_data[target].to_numpy('<U1')

#   # Predict on the test set (current person's data)
#   y_pred = classifier.predict(rocket.transform(X_test_nested))

#   # Evaluate the model (for this person)
#   accuracy = accuracy_score(y_test, y_pred)
#   report = classification_report(y_test, y_pred, target_names=label_encoder.classes_)
#   results.append({
#       'person': person,
#       'accuracy': accuracy,
#       'classification_report': report
#   })

In [None]:
# Print results
for result in results:
    print(f"Results for {result['person']}:")
    print(f"Accuracy: {result['accuracy']}")
    print("Classification Report:")
    print(result['classification_report'])
    print("--------------------------------------------------")