In [82]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense
from sklearn.model_selection import train_test_split
from imblearn.over_sampling import RandomOverSampler


## Data

In [7]:
FeaturesPHQ9 = pd.DataFrame()
FeaturesPHQ9 = pd.read_csv('../../dataset/BasicFeatures/Merged/AllBasicMerged.csv')
print(FeaturesPHQ9.keys())

Index(['userId', 'DarknessEarlyMorningMean', 'DarknessMorningMean',
       'DarknessAfternoonMean', 'DarknessEveningMean',
       'DarknessEarlyMorningSum', 'DarknessMorningSum', 'DarknessAfternoonSum',
       'DarknessEveningSum', 'DarknessShortestEarlyMorning',
       ...
       'ConversationEveningSum', 'ConversationShortestEarlyMorning',
       'ConversationShortestMorning', 'ConversationShortestAfternoon',
       'ConversationShortestEvening', 'ConversationLongestEarlyMorning',
       'ConversationLongestMorning', 'ConversationLongestAfternoon',
       'ConversationLongestEvening', 'PHQ9'],
      dtype='object', length=106)


In [8]:
FeaturesPHQ9 = FeaturesPHQ9.drop(columns=['userId','WeekId'])

In [9]:
dataClassification = FeaturesPHQ9.copy()
columns = ['PHQ9']

for column in columns:
    dataClassification.loc[((dataClassification[column] > 0)& (dataClassification[column]<=5)), column] = 0
    dataClassification.loc[((dataClassification[column] > 5)), column] = 1

print(dataClassification)

     DarknessEarlyMorningMean  DarknessMorningMean  DarknessAfternoonMean  \
0                18372.333333         15542.000000            6005.769231   
1                14846.090909         26047.000000            8724.642857   
2                15283.095238         24830.333333            4187.125000   
3                20652.578947             0.000000            8388.294118   
4                11652.900000         19603.200000            4600.222222   
..                        ...                  ...                    ...   
247                  0.000000         27599.500000           13925.000000   
248              34205.000000             0.000000           14905.333333   
249              14656.416667         17738.625000            9287.833333   
250              33182.000000             0.000000           13456.285714   
251              10468.000000         21137.142857            5393.250000   

     DarknessEveningMean  DarknessEarlyMorningSum  DarknessMorningSum  \
0 

In [114]:
X = dataClassification.drop(['PHQ9'], axis=1)
# X = dataClassification.drop(['PHQ9'], axis=1)
# y_PHQ9 = dataClassification['PHQ9']
y_PHQ9Base = dataClassification['PHQ9']
print(X.keys())


Index(['DarknessEarlyMorningMean', 'DarknessMorningMean',
       'DarknessAfternoonMean', 'DarknessEveningMean',
       'DarknessEarlyMorningSum', 'DarknessMorningSum', 'DarknessAfternoonSum',
       'DarknessEveningSum', 'DarknessShortestEarlyMorning',
       'DarknessShortestMorning',
       ...
       'ConversationAfternoonSum', 'ConversationEveningSum',
       'ConversationShortestEarlyMorning', 'ConversationShortestMorning',
       'ConversationShortestAfternoon', 'ConversationShortestEvening',
       'ConversationLongestEarlyMorning', 'ConversationLongestMorning',
       'ConversationLongestAfternoon', 'ConversationLongestEvening'],
      dtype='object', length=103)


In [117]:
print(y_PHQ9Base)

0      0
1      0
2      1
3      0
4      0
      ..
247    1
248    0
249    0
250    0
251    0
Name: PHQ9, Length: 252, dtype: int64


In [88]:
# Initialize the RandomOverSampler
ros = RandomOverSampler(sampling_strategy='minority', random_state=42)

# Fit and apply the random oversampling
X_resampled, y_resampled = ros.fit_resample(X, y_PHQ9Base)

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y_PHQ9Base, test_size=0.2, random_state=42)

In [89]:
X_trainOversampled, X_testOversampled, y_trainOversampled, y_testOversampled = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)

In [17]:
print(X_train.shape)

(201, 103)


## RNN Model - original

In [108]:

num_features = X_train.shape[1]
print(num_features)

# Adjust num_time_steps based on the size of your DataFrame
num_time_steps = X_train.shape[0] // num_features  # Assuming each sample occupies num_features rows
print(X_train.shape[0])

# Reshape the DataFrame accordingly
X_train_reshaped = X_train.values.reshape(-1, num_time_steps, num_features)

# Assuming X_test is a DataFrame
X_test_reshaped = X_test.values.reshape(X_test.shape[0], num_time_steps, num_features)
print((-1, num_time_steps, num_features))
print((X_test.shape[0], num_time_steps, num_features))



103
201
[[[ 4067.         16219.          5789.5        ...   948.
    6868.          2753.        ]]

 [[ 9234.44444444 31530.25       13133.25       ...   319.
    3428.          3206.        ]]

 [[27281.6        19708.         12088.14285714 ...  6011.
    7949.          5971.        ]]

 ...

 [[14140.75       19714.11111111  6819.88888889 ...  2012.
    7846.          4904.        ]]

 [[10295.             0.             0.         ...  2366.
     384.          1577.        ]]

 [[16708.09090909 14757.          9636.22222222 ...  2796.
    7715.          6903.        ]]]
(-1, 1, 103)
(51, 1, 103)


In [109]:
# Model parameters
rnn_units = 128
num_classes = len(np.unique(y_train))
num_epochs = 50
batch_size = 128

In [110]:
model = Sequential()
model.add(SimpleRNN(units=rnn_units, input_shape=(num_time_steps, num_features)))
model.add(Dense(num_classes, activation='softmax'))

In [111]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [112]:
model.fit(X_train, y_train, epochs=num_epochs, batch_size=batch_size)


Epoch 1/50


ValueError: in user code:

    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1284, in train_function  *
        return step_function(self, iterator)
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1268, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in run_step  **
        outputs = model.train_step(data)
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1050, in train_step
        y_pred = self(x, training=True)
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_13" is incompatible with the layer: expected shape=(None, 1, 103), found shape=(None, 103)


In [80]:
loss, accuracy = model.evaluate(X_test_reshaped, y_test)
print("Test Accuracy:", accuracy)


Test Accuracy: 0.7058823704719543


## RNN Model - Oversampled

In [102]:

num_featuresOversampled = X_trainOversampled.shape[1]

# Calculate the number of rows per sample after oversampling
rows_per_sample_oversampled = X_trainOversampled.shape[0] // len(X_trainOversampled)

# Adjust num_time_stepsOversampled based on the number of rows per sample
num_time_stepsOversampled = X_trainOversampled.shape[0] // rows_per_sample_oversampled

# Reshape the DataFrame accordingly
X_train_reshapedOversampled = X_trainOversampled.values.reshape(-1, num_time_stepsOversampled, num_featuresOversampled)

# Reshape the test DataFrame accordingly
X_test_reshapedOversampled = X_testOversampled.values.reshape(X_testOversampled.shape[0], num_time_stepsOversampled, num_featuresOversampled)




# # Adjust num_time_steps based on the size of your DataFrame
# num_time_stepsOversampled = X_trainOversampled.shape[0] // num_featuresOversampled  # Assuming each sample occupies num_features rows

# # Reshape the DataFrame accordingly
# X_train_reshapedOversampled = X_trainOversampled.values.reshape(-1, num_time_stepsOversampled, num_featuresOversampled)
# # Assuming X_test is a DataFrame
# X_test_reshapedOversampled = X_testOversampled.values.reshape(X_testOversampled.shape[0], num_time_stepsOversampled, num_featuresOversampled)

ValueError: cannot reshape array of size 7107 into shape (69,275,103)

In [97]:
# Model parameters
rnn_units = 128
num_classes = len(np.unique(y_trainOversampled))
num_epochs = 50
batch_size = 128

In [98]:
model = Sequential()
model.add(SimpleRNN(units=rnn_units, input_shape=(num_time_stepsOversampled, num_featuresOversampled)))
model.add(Dense(num_classes, activation='softmax'))

In [99]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

In [100]:
model.fit(X_train_reshapedOversampled, y_trainOversampled, epochs=num_epochs, batch_size=batch_size)

ValueError: Data cardinality is ambiguous:
  x sizes: 1
  y sizes: 275
Make sure all arrays contain the same number of samples.

In [101]:
loss, accuracy = model.evaluate(X_test_reshapedOversampled, y_testOversampled)
print("Test Accuracy:", accuracy)


ValueError: in user code:

    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1852, in test_function  *
        return step_function(self, iterator)
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1836, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1824, in run_step  **
        outputs = model.test_step(data)
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/training.py", line 1788, in test_step
        y_pred = self(x, training=False)
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 70, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "/opt/homebrew/anaconda3/envs/tensorflowenv/lib/python3.9/site-packages/keras/engine/input_spec.py", line 298, in assert_input_compatibility
        raise ValueError(

    ValueError: Input 0 of layer "sequential_12" is incompatible with the layer: expected shape=(None, 275, 103), found shape=(None, 1, 103)
