In [1]:
# Importing necessary libraries
import pandas as pd
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Flatten, Dropout, Conv1D, MaxPooling1D
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

# Load the dataset
raw_df = pd.read_csv('daily_motion.csv')

# Displaying the basic statistics of the dataset
raw_df.describe()

In [2]:
# Renaming the 'Unnamed: 0' column to 'user_id' for better understanding
raw_df.rename(columns={'Unnamed: 0': 'user_id'}, inplace=True)

# Selecting the relevant columns for the feature set (X)
# Dropping columns not used as features
X = raw_df.drop(columns=['user_id', 'gender', 'mental_disorder', 'melanch', 
                         'inpatient', 'marriage', 'work', 'madrs1', 'madrs2', 
                         'age', 'edu'])

# Checking for missing values in the feature set
X.isna().sum()

In [3]:
# Preparing the target variable 'y' (labels)
# Filling missing values in 'mental_disorder' column with 0 (assuming control group)
y = raw_df['mental_disorder'].fillna(value=0)

# One-hot encoding the target variable
y = to_categorical(y)

In [4]:
# Splitting the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Reshaping the input data for the CNN model
# The reshape is necessary to add an additional dimension representing the channels (1 in this case)
X_train = X_train.values.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.values.reshape(X_test.shape[0], X_test.shape[1], 1)

In [5]:
# Defining the CNN model architecture
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Conv1D(filters=64, kernel_size=3, activation='relu'))
model.add(Dropout(0.5))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(100, activation='relu'))

# Assuming binary classification (bipolar disorder or not)
# If more categories, adjust the number of units and activation accordingly
n_outputs = y.shape[1]
model.add(Dense(n_outputs, activation='softmax'))

# Compiling the model
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

In [6]:
# Training the model
verbose, epochs, batch_size = 0, 10, 32
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size, verbose=verbose)

In [7]:
# Evaluating the model on the test set
_, accuracy = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)
print(f'Model Accuracy: {accuracy}')