In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

df = pd.read_csv('anxiety_depression_data.csv')
print("First few rows of the dataset:")
print(df.head())

print(f"Dataset shape: {df.shape}")
print(f"Missing values:\n{df.isnull().sum()}")

In [7]:
# A bit of Feature Selection
# Define target variables
y = df[['Depression_Score', 'Anxiety_Score']]

# Select numerical features (including both int64 and float64)
numerical_columns = df.select_dtypes(include=['int64', 'float64']).columns
feature_columns = [col for col in numerical_columns if col not in ['Depression_Score', 'Anxiety_Score']]
x = df[feature_columns]

In [None]:
# One-Hot Encoding for categorical variables
categorical_columns = ['Gender', 'Education_Level', 'Employment_Status', 'Meditation']
x_encoded = pd.get_dummies(x, columns=categorical_columns, prefix=categorical_columns)

# Update x with encoded features
x = x_encoded

# Display the shape after encoding
print(f"Shape after One-Hot Encoding: {x.shape}")
print("\nNew feature names:")
print(x.columns.tolist())

In [None]:
# Split data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(x, y, random_state=40)
print(f"Training set: {x_train.shape}, Test set: {x_test.shape}")

scaler = StandardScaler().fit(x_train)

x_train_scaled = scaler.transform(x_train)

# Validation
print(f"Training data after scaling - Mean: {x_train_scaled.mean(axis=0).round(10)}")
print(f"Training data after scaling - Std: {x_train_scaled.std(axis=0)}")

# Transform test data using the training scaler
x_test_scaled = scaler.transform(x_test)

# Validation of test scaling
print(f"Test data after scaling - Mean: {x_test_scaled.mean(axis=0).round(10)}")
print(f"Test data after scaling - Std: {x_test_scaled.std(axis=0)}")