In [3]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import MinMaxScaler

%matplotlib inline
sns.set(color_codes=True)

In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
BASE_PATH = '/content/drive/MyDrive/StudentStressLevelMonitoring'
INPUT_CSV = os.path.join(BASE_PATH, 'results/outputs', 'dataset_no_outliers.csv')

In [6]:
OUTPUTS_DRIVE = os.path.join(BASE_PATH, 'results/outputs')
os.makedirs(OUTPUTS_DRIVE, exist_ok=True)

In [7]:
OUTPUTS_DRIVE_EDA = os.path.join(BASE_PATH, 'results/eda_visualizations')
os.makedirs(OUTPUTS_DRIVE_EDA, exist_ok=True)

In [8]:
df = pd.read_csv(INPUT_CSV)
print("Loaded:", INPUT_CSV, "| Shape:", df.shape)
display(df.head())

Loaded: /content/drive/MyDrive/StudentStressLevelMonitoring/results/outputs/dataset_no_outliers.csv | Shape: (793, 21)


Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,14,20.0,0.0,11.0,2.0,1.0,2.0,4.0,2.0,3.0,...,2.0,3.0,2.0,3.0,3.0,2.0,3.0,3.0,2.0,1.0
1,15,8.0,1.0,15.0,5.0,3.0,1.0,4.0,3.0,1.0,...,2.0,1.0,4.0,1.0,5.0,1.0,4.0,5.0,5.0,2.0
2,12,18.0,1.0,14.0,2.0,1.0,2.0,2.0,2.0,2.0,...,2.0,2.0,3.0,3.0,2.0,2.0,3.0,2.0,2.0,1.0
3,16,12.0,1.0,15.0,4.0,3.0,1.0,3.0,4.0,2.0,...,2.0,2.0,4.0,1.0,4.0,1.0,4.0,4.0,5.0,2.0
4,16,28.0,0.0,7.0,2.0,3.0,5.0,1.0,3.0,2.0,...,3.0,4.0,3.0,1.0,2.0,1.0,5.0,0.0,5.0,1.0


In [9]:
TARGET = "stress_level"
numeric_cols = [c for c in df.select_dtypes(include=[np.number]).columns if c != TARGET]
df_scaled = df.copy()
if numeric_cols:
    scaler = MinMaxScaler()
    df_scaled[numeric_cols] = scaler.fit_transform(df[numeric_cols])
else:
    print("No numeric feature columns found to scale.")


In [10]:
def save_boxplot(dataframe, cols, title, save_paths):
    if not cols:
        print(f"[skip] No columns for {title}")
        return
    plt.figure(figsize=(max(12, len(cols)*0.7), 6))
    dataframe[cols].boxplot()
    plt.xticks(rotation=90)
    plt.title(title)
    plt.tight_layout()
    for p in save_paths:
        plt.savefig(p, dpi=150, bbox_inches="tight")
        print("Saved:", p)
    plt.close()

In [11]:
save_boxplot(
    df, numeric_cols,
    "Numeric Columns — Before Scaling",
    [
        os.path.join(OUTPUTS_DRIVE_EDA, "box_before_scaling.png"),
    ]
)

Saved: /content/drive/MyDrive/StudentStressLevelMonitoring/results/eda_visualizations/box_before_scaling.png


In [12]:
df_scaled = df.copy()
if numeric_cols:
    scaler = MinMaxScaler()
    df_scaled[numeric_cols] = scaler.fit_transform(df[numeric_cols])
else:
    print("No numeric columns found. Skipping scaling.")

print("\nHead of scaled data:")
display(df_scaled.head())


Head of scaled data:


Unnamed: 0,anxiety_level,self_esteem,mental_health_history,depression,headache,blood_pressure,sleep_quality,breathing_problem,noise_level,living_conditions,...,basic_needs,academic_performance,study_load,teacher_student_relationship,future_career_concerns,social_support,peer_pressure,extracurricular_activities,bullying,stress_level
0,0.666667,0.666667,0.0,0.407407,0.4,0.0,0.4,0.8,0.333333,0.666667,...,0.4,0.6,0.333333,0.6,0.6,0.666667,0.6,0.6,0.4,1.0
1,0.714286,0.266667,1.0,0.555556,1.0,1.0,0.2,0.8,0.666667,0.0,...,0.4,0.2,1.0,0.2,1.0,0.333333,0.8,1.0,1.0,2.0
2,0.571429,0.6,1.0,0.518519,0.4,0.0,0.4,0.4,0.333333,0.333333,...,0.4,0.4,0.666667,0.6,0.4,0.666667,0.6,0.4,0.4,1.0
3,0.761905,0.4,1.0,0.555556,0.8,1.0,0.2,0.6,1.0,0.333333,...,0.4,0.4,1.0,0.2,0.8,0.333333,0.8,0.8,1.0,2.0
4,0.761905,0.933333,0.0,0.259259,0.4,1.0,1.0,0.2,0.666667,0.333333,...,0.6,0.8,0.666667,0.2,0.4,0.333333,1.0,0.0,1.0,1.0


In [13]:
save_boxplot(
    df_scaled, numeric_cols,
    "Numeric Columns — After MinMax Scaling",
    [
        os.path.join(OUTPUTS_DRIVE_EDA, "box_after_scaling.png"),

    ]
)

Saved: /content/drive/MyDrive/StudentStressLevelMonitoring/results/eda_visualizations/box_after_scaling.png


In [14]:
scaled_csv_drive = os.path.join(OUTPUTS_DRIVE, "scaled_dataset.csv")
df_scaled.to_csv(scaled_csv_drive, index=False)
print("\nSaved scaled dataset to:")
print(" -", scaled_csv_drive)


Saved scaled dataset to:
 - /content/drive/MyDrive/StudentStressLevelMonitoring/results/outputs/scaled_dataset.csv
