In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
# my code 
#import libaraies and function to need 

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.decomposition import PCA
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from sklearn.metrics import classification_report, confusion_matrix
import tensorflow as tf

In [None]:
# Enable TPU
try:
    resolver = tf.distribute.cluster_resolver.TPUClusterResolver()
    tf.config.experimental_connect_to_cluster(resolver)
    tf.tpu.experimental.initialize_tpu_system(resolver)
    strategy = tf.distribute.experimental.TPUStrategy(resolver)
    print("All devices: ", tf.config.list_logical_devices('TPU'))
except:
    strategy = tf.distribute.get_strategy()
    print("All devices: ", tf.config.list_logical_devices('GPU'))


In [None]:
# Load the dataset
df = pd.read_csv('/kaggle/input/nurse-stress-prediction-wearable-sensors/merged_data.csv', low_memory=False, parse_dates=['datetime'])


In [None]:
df

In [None]:
df.info()

In [None]:
#df.isna().sum()
print(df.isna().sum())


In [None]:
num_duplicated_rows = df.duplicated().sum()
num_duplicated_rows
print(f'Number of duplicated rows: {num_duplicated_rows}')


In [None]:
df.head(5)

In [None]:
df.datetime

In [None]:
# Extract datetime features

df['month']=df['datetime'].dt.month
df['day']=df['datetime'].dt.day
df['hour']=df['datetime'].dt.hour
df['minute']=df['datetime'].dt.minute
df['second']=df['datetime'].dt.second 

#df.drop('datetime',axis=1,inplace=True)

In [None]:
# Drop 'datetime' and 'id' columns
df.drop(['datetime', 'id'], axis=1, inplace=True)

In [None]:
# Correlation matrix
corr_mat = df.corr().round(2)

In [None]:
corr_mat

In [None]:
plt.figure(figsize=(10, 8))
sns.heatmap(corr_mat, annot=True, cmap='coolwarm')
plt.title('Correlation Analysis Plot')
plt.show()

In [None]:
# Separate features and target
X = df.drop('label', axis=1)
y = df['label']

In [None]:
# Handle imbalanced data
over_sample = RandomOverSampler()
X, y = over_sample.fit_resample(X, y)

In [None]:
# Split the dataset into training, validation, and testing sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.2, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)


In [None]:
# Scale the features
scaler = RobustScaler()
X_trian = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test =scaler.transform(X_test)

In [None]:
# Early stopping and checkpoint callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True, monitor='val_loss', mode='min')


In [None]:
from tensorflow.keras.layers import  Input

# Build the model inside the TPU strategy scope
with strategy.scope():
    model = Sequential([
        Input(shape=(X_train.shape[1],)),
        Dense(128, activation='relu'),
        Dropout(0.5),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(32, activation='relu'),
        Dense(3, activation='softmax')  # Assuming 3 classes
    ])

In [None]:
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])


In [None]:
# Train the model
history = model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[early_stopping, checkpoint])
# Train the model
#history = model.fit(X_train, y_train, epochs=50, validation_data=(X_val, y_val), callbacks=[early_stopping, checkpoint])

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

In [None]:
# Evaluate the model
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

In [None]:
print(classification_report(y_test, y_pred_classes))
# Classification Report
#report = classification_report(y_test, y_pred)
#print('Classification Report:')
#print(report)

In [None]:
# Model Evaluation and Visualization
plt.figure(figsize=(12, 4))
plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Model Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()


In [None]:
plt.subplot(1, 2, 2) 
plt.plot(history.history['loss'], label='Train Loss') 
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss') 
plt.xlabel('Epochs') 
plt.ylabel('Loss')
plt.legend() 
plt.show()

In [None]:
# Confusion Matrix
y_pred = np.argmax(model.predict(X_test), axis=1)
cm = confusion_matrix(y_test, y_pred)
print('Confusion Matrix:')
print(cm)



In [None]:
# Classification Report
report = classification_report(y_test, y_pred)
print('Classification Report:')
print(report)


In [None]:
# Present confusion matrix as a table
class_names = ['No Depression', 'Mild Depression', 'Serious Depression']  
print("\nFormatted Confusion Matrix:")
print("          " + "   ".join(f"{name:10}" for name in class_names))
for i, row in enumerate(cm):
    print(f"{class_names[i]:10}" + "   ".join(f"{val:10}" for val in row))

In [None]:
# Confusion matrix
conf_matr = confusion_matrix(y_test, y_true_preds)
sns.heatmap(conf_matr, annot=True, fmt='d', cmap='viridis')
plt.title('Confusion Matrix Plot')
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.show()