In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
BASE_PATH = '/content/drive/MyDrive/StudentStressLevelMonitoring'
ORIGINAL_INPUT = os.path.join(BASE_PATH, 'results/outputs', 'dataset_no_outliers.csv')  # original units
SCALED_INPUT   = os.path.join(BASE_PATH, 'results/outputs', 'scaled_dataset.csv')         # from Part 3

OUTPUTS_DRIVE = os.path.join(BASE_PATH, 'results/outputs')
os.makedirs(OUTPUTS_DRIVE, exist_ok=True)

OUTPUTS_DRIVE_EDA = os.path.join(BASE_PATH, 'results/eda_visualizations')
os.makedirs(OUTPUTS_DRIVE_EDA, exist_ok=True)

In [4]:
df_orig   = pd.read_csv(ORIGINAL_INPUT)  # compute engineered features on original units
df_scaled = pd.read_csv(SCALED_INPUT)    # final frame we’ll attach to

print("Loaded ORIGINAL:", ORIGINAL_INPUT, "| Shape:", df_orig.shape)
print("Loaded SCALED  :", SCALED_INPUT,   "| Shape:", df_scaled.shape)

Loaded ORIGINAL: /content/drive/MyDrive/StudentStressLevelMonitoring/results/outputs/dataset_no_outliers.csv | Shape: (793, 21)
Loaded SCALED  : /content/drive/MyDrive/StudentStressLevelMonitoring/results/outputs/scaled_dataset.csv | Shape: (793, 21)


In [5]:
SUB_COLUMNS = {
    "health_index": ["depression", "headache", "blood_pressure", "breathing_problem"],
    "academic_stress": ["academic_performance", "study_load", "future_career_concerns"],
    "social_environment": ["social_support", "peer_pressure", "bullying"]
}

engineered_features = []
# Create engineered features on ORIGINAL df, then attach to scaled by index
for feat_name, cols in SUB_COLUMNS.items():
    available = [c for c in cols if c in df_orig.columns]
    if not available:
        print(f"[warn] No available columns for '{feat_name}'. Skipping.")
        continue
    df_orig[feat_name] = df_orig[available].mean(axis=1)
    df_scaled[feat_name] = df_orig[feat_name]
    engineered_features.append(feat_name)
    print(f"[ok] Created '{feat_name}' from: {available}")

if engineered_features:
    print("\nFirst 10 rows of engineered features (on original units):")
    display(df_orig[engineered_features].head(10))
else:
    print("\n[info] No engineered features created.")

[ok] Created 'health_index' from: ['depression', 'headache', 'blood_pressure', 'breathing_problem']
[ok] Created 'academic_stress' from: ['academic_performance', 'study_load', 'future_career_concerns']
[ok] Created 'social_environment' from: ['social_support', 'peer_pressure', 'bullying']

First 10 rows of engineered features (on original units):


Unnamed: 0,health_index,academic_stress,social_environment
0,4.5,2.666667,2.333333
1,6.75,3.333333,3.333333
2,4.75,2.333333,2.333333
3,6.25,3.333333,3.333333
4,3.25,3.0,3.666667
5,2.5,2.333333,2.0
6,8.5,2.666667,3.333333
7,5.0,3.0,2.666667
8,8.75,2.666667,3.333333
9,3.25,2.666667,1.666667


In [6]:
for feat_name, cols in SUB_COLUMNS.items():
    available = [c for c in cols if c in df_scaled.columns]
    if not available:
        continue
    means = df_scaled[available].mean()
    plt.figure(figsize=(8,5))
    x = np.arange(len(available)) * 2
    plt.bar(x, means.values, width=1.0)
    plt.xticks(x, available, rotation=25)
    plt.title(f"Scaled {feat_name.replace('_',' ').title()} Sub-Features — Average")
    plt.ylabel("Scaled Average Value")
    plt.xlabel("Sub-Features")
    plt.tight_layout()
    out_drive_eda = os.path.join(OUTPUTS_DRIVE_EDA, f"bar_scaled_{feat_name}.png")
    plt.savefig(out_drive_eda, dpi=150, bbox_inches="tight");
    plt.close()
    print("Saved:", out_drive_eda,)


Saved: /content/drive/MyDrive/StudentStressLevelMonitoring/results/eda_visualizations/bar_scaled_health_index.png
Saved: /content/drive/MyDrive/StudentStressLevelMonitoring/results/eda_visualizations/bar_scaled_academic_stress.png
Saved: /content/drive/MyDrive/StudentStressLevelMonitoring/results/eda_visualizations/bar_scaled_social_environment.png


In [7]:
OUT_DRIVE = os.path.join(OUTPUTS_DRIVE, "scaled_dataset_with_engineered.csv")
df_scaled.to_csv(OUT_DRIVE, index=False)
print("\nSaved:")
print(" -", OUT_DRIVE)



Saved:
 - /content/drive/MyDrive/StudentStressLevelMonitoring/results/outputs/scaled_dataset_with_engineered.csv
