<a href="https://colab.research.google.com/github/Kelzo8/AI/blob/main/AI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

Preproccessing -Andrew

In [None]:
df = pd.read_csv('/content/Sleep_health_and_lifestyle_dataset.csv')

Load csv file

In [None]:
df.drop(columns=['Person ID'], inplace=True)

Drop person ID column as we don't need it

In [None]:
bp_split = df['Blood Pressure'].str.split('/', expand=True)
df['Systolic BP'] = pd.to_numeric(bp_split[0])
df['Diastolic BP'] = pd.to_numeric(bp_split[1])
df.drop(columns=['Blood Pressure'], inplace=True)

Split Blood Pressure into Systolic and Diastolic

In [None]:

categorical_cols = ['Gender', 'Occupation', 'BMI Category', 'Sleep Disorder']
encoder = LabelEncoder()
for col in categorical_cols:
    df[col] = encoder.fit_transform(df[col])

Encode all categorical variables

In [None]:
numerical_cols = ['Sleep Duration', 'Physical Activity Level', 'Stress Level',
                  'Heart Rate', 'Daily Steps', 'Systolic BP', 'Diastolic BP']
scaler = StandardScaler()
df[numerical_cols] = scaler.fit_transform(df[numerical_cols])

Standardize all numerical features

In [None]:
correlation_matrix = df.corr()

Compute correlation matrix

In [None]:
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Heatmap of Health & Sleep Factors')
plt.show()

In [None]:
df.to_csv('Processed_Sleep_Health_Dataset.csv', index=False)


In [None]:
correlation_matrix['Quality of Sleep'].sort_values(ascending=False)


In [None]:
# Load dataset
df = pd.read_csv("/content/Sleep_health_and_lifestyle_dataset.csv")

# Display dataset information
df.info()
df.head()

In [None]:
# Helps visualize whether sleep duration impacts sleep quality.
df.plot(kind='scatter', x='Sleep Duration', y='Quality of Sleep', s=32, alpha=.8)
plt.gca().spines[['top', 'right']].set_visible(False)

# Shows how sleep duration is distributed among individuals.
plt.figure(figsize=(8, 5))
sns.histplot(df["Sleep Duration"], bins=20, kde=True, color="blue")
plt.title("Distribution of Sleep Duration")
plt.xlabel("Sleep Duration (hours)")
plt.ylabel("Count")
plt.show()

In [None]:
# Helps see how stress affects sleep duration.
plt.figure(figsize=(8, 5))
sns.boxplot(data=df, x="Stress Level", y="Sleep Duration", palette="coolwarm")
plt.title("Stress Level vs. Sleep Duration")
plt.xlabel("Stress Level")
plt.ylabel("Sleep Duration (hours)")
plt.show()

In [None]:
# Displays how sleep disorders vary between genders.
plt.figure(figsize=(8, 5))
sns.countplot(data=df, x="Gender", hue="Sleep Disorder", palette="viridis")
plt.title("Sleep Disorder Count by Gender")
plt.xlabel("Gender")
plt.ylabel("Count")
plt.legend(title="Sleep Disorder")
plt.show()

In [None]:
# Drop Person ID since it's not a relevant feature
df = df.drop(columns=["Person ID"])

# Encode categorical variables
label_enc = LabelEncoder()
df["Gender"] = label_enc.fit_transform(df["Gender"])
df["BMI Category"] = label_enc.fit_transform(df["BMI Category"])
df["Occupation"] = label_enc.fit_transform(df["Occupation"])

# Define features and target variable
X = df.drop(columns=["Quality of Sleep"]).values
y = df["Quality of Sleep"].values

In [None]:
# Scale features
scaler = StandardScaler()
X = scaler.fit_transform(X)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Define neural network model
model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(64, activation='relu'),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(1, activation='linear')  # Regression output
])

# Compile model
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# Train model
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=100, batch_size=32, verbose=1)

# Evaluate model
loss, mae = model.evaluate(X_test, y_test)
print(f"Test MAE: {mae}")