In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from sklearn.ensemble import RandomForestClassifier

hrv_data = pd.read_csv('../input/hrv.csv')
stress_data = pd.read_csv('../input/stress.csv')
sleep_data = pd.read_csv('../input/sleep_summary.csv')
steps_data = pd.read_csv('../input/steps.csv')
spo2_data = pd.read_csv('../input/spo2.csv')
breathing_data = pd.read_csv('../input/breathing.csv')

hrv_data.drop('coverage', axis=1, inplace=True)

# Convert date columns to datetime format and handle of date minus
stress_data['date'] = pd.to_datetime(stress_data['date'])
steps_data['date'] = pd.to_datetime(steps_data['date'])
sleep_data['dateOfSleep'] = pd.to_datetime(sleep_data['dateOfSleep'])
sleep_data['date'] = pd.to_datetime(sleep_data['dateOfSleep']) - pd.DateOffset(days=1)
hrv_data['date'] = pd.to_datetime(hrv_data['minute'])
spo2_data['date'] = pd.to_datetime(spo2_data['minute'])
spo2_data = spo2_data.groupby(spo2_data['date'].dt.date).mean()
spo2_data = spo2_data.reset_index()
spo2_data['date'] = pd.to_datetime(spo2_data['date'])
breathing_data['date'] = pd.to_datetime(breathing_data['dateTime'])
breathing_data['date'] = pd.to_datetime(breathing_data['date']) - pd.DateOffset(days=1)
hrv_data = hrv_data.groupby(hrv_data['date'].dt.date).mean()
hrv_data = hrv_data.reset_index()
hrv_data['date'] = pd.to_datetime(hrv_data['date'])
hrv_data['date'] = pd.to_datetime(hrv_data['date']) - pd.DateOffset(days=1)

# Create a date range for merging data
date_range = pd.date_range(start='2023-11-01', end='2024-02-16', freq='D')

date_df = pd.DataFrame({'date': date_range})

hrv_data = date_df.merge(hrv_data, on='date', how='left')
hrv_data = hrv_data.ffill()
stress_data = date_df.merge(stress_data, on='date', how='left')
stress_data = stress_data.ffill()
sleep_data = date_df.merge(sleep_data, on='date', how='left')
sleep_data = sleep_data.ffill()
steps_data = date_df.merge(steps_data, on='date', how='left')
steps_data = steps_data.ffill()
spo2_data = date_df.merge(spo2_data, on='date', how='left')
spo2_data = spo2_data.ffill()
breathing_data = date_df.merge(breathing_data, on='date', how='left')
breathing_data = breathing_data.ffill()

# Merge hrv_data and stress_data using left join
merged_data = hrv_data.merge(stress_data, left_on='date', right_on='date', how='outer')

# Merge merged_data and sleep_data using left join on the index
merged_df = merged_data.merge(sleep_data, left_on='date', right_on='date', how='outer')

merged_df = merged_df.merge(steps_data, left_on='date', right_on='date', how='outer')

merged_df = merged_df.merge(spo2_data, left_on='date', right_on='date', how='outer')

final_df = merged_df.merge(breathing_data, left_on='date', right_on='date', how='outer')

hrv_features = final_df[['rmssd', 'hf', 'lf', 'duration', 'efficiency', 'minutesAsleep', 'minutesAwake', 'steps', 'spo2', 'deepSleep','remSleep','fullSleep','lightSleep']]
stress_labels = final_df['level'] - 1

X_train, X_test, y_train, y_test = train_test_split(hrv_features, stress_labels, test_size=0.2, random_state=42)

rf_model = RandomForestClassifier()
rf_model.fit(X_train, y_train)

# Model Evaluation
y_pred = rf_model.predict(X_test)

report = classification_report(y_test, y_pred)
print(report)