<a href="https://colab.research.google.com/github/DarshanSuresh/Academic_Stress_Levels_in_UG_Students_using_ML_Models/blob/main/Git_Fork01.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Corrected Code for Stress Analysis - Meditation and Academic Stress

import pandas as pd
import numpy as np

# Step 1: Load the CSV File
file_path = "/content/Research Paper Data Collection (UG) - Meditation and Academic Stress - Main.csv"
df = pd.read_csv(file_path, encoding='utf-8', on_bad_lines='skip')

# Step 2: Select Relevant Columns (with corrected column names)
# Note: The column names don't have prefixes like "A." and "B." in the actual dataset
cols = [
    "Enter your Gender",
    "Frequency of Meditation:",
    "PRE1. I feel stressed about academic deadlines and exams.",
    "POST1. I feel stressed about academic deadlines and exams."
]
data = df[cols].copy()

# Step 3: Clean the Data
data = data.dropna()
data.rename(columns={
    "Frequency of Meditation:": "Meditation_Frequency",
    "PRE1. I feel stressed about academic deadlines and exams.": "Stress_Pre",
    "POST1. I feel stressed about academic deadlines and exams.": "Stress_Post"
}, inplace=True)

# Step 4: The stress levels are already encoded as numbers (1-5 scale)
# No need for manual encoding - the data is already numerical

# Step 5: Compute Average Reduction in Stress
data["Stress_Change"] = data["Stress_Post"] - data["Stress_Pre"]
avg_change = data["Stress_Change"].mean()

print("=" * 80)
print("                       STRESS ANALYSIS RESULTS")
print("=" * 80)
print()
print("🔹 Average Stress Change (Post - Pre):", round(avg_change, 2))
print()

if avg_change < 0:
    print("🧘 Meditation seems to reduce stress on average.")
    print(f"   The average stress reduction is {abs(round(avg_change, 2))} points on a 5-point scale.")
elif avg_change > 0:
    print("⚠️ Stress increased slightly post meditation.")
    print(f"   The average stress increase is {round(avg_change, 2)} points on a 5-point scale.")
else:
    print("ℹ️ No significant change in stress levels observed.")

print()
print("=" * 80)
print()

# Additional statistics
print("📊 DETAILED STATISTICS:")
print("-" * 80)
print(f"   • Average PRE-meditation stress:  {round(data['Stress_Pre'].mean(), 2)}")
print(f"   • Average POST-meditation stress: {round(data['Stress_Post'].mean(), 2)}")
print(f"   • Total respondents analyzed:     {len(data)}")
print()

# Count how many people experienced reduction, increase, or no change
reduced = (data["Stress_Change"] < 0).sum()
increased = (data["Stress_Change"] > 0).sum()
no_change = (data["Stress_Change"] == 0).sum()

print("📈 STRESS CHANGE DISTRIBUTION:")
print("-" * 80)
print(f"   • Stress REDUCED:    {reduced} respondents ({round(reduced/len(data)*100, 1)}%)")
print(f"   • Stress INCREASED:  {increased} respondents ({round(increased/len(data)*100, 1)}%)")
print(f"   • NO CHANGE:         {no_change} respondents ({round(no_change/len(data)*100, 1)}%)")
print()
print("=" * 80)

# Analysis by Meditation Frequency
print()
print("=" * 80)
print("         STRESS REDUCTION BY MEDITATION FREQUENCY")
print("=" * 80)
print()
freq_analysis = data.groupby("Meditation_Frequency").agg({
    "Stress_Change": ["mean", "count"],
    "Stress_Pre": "mean",
    "Stress_Post": "mean"
}).round(2)
print(freq_analysis)
print()

# Analysis by Gender
print("=" * 80)
print("              STRESS REDUCTION BY GENDER")
print("=" * 80)
print()
gender_analysis = data.groupby("Enter your Gender").agg({
    "Stress_Change": ["mean", "count"],
    "Stress_Pre": "mean",
    "Stress_Post": "mean"
}).round(2)
print(gender_analysis)
print()
print("=" * 80)


                       STRESS ANALYSIS RESULTS

🔹 Average Stress Change (Post - Pre): -0.34

🧘 Meditation seems to reduce stress on average.
   The average stress reduction is 0.34 points on a 5-point scale.


📊 DETAILED STATISTICS:
--------------------------------------------------------------------------------
   • Average PRE-meditation stress:  3.5
   • Average POST-meditation stress: 3.16
   • Total respondents analyzed:     351

📈 STRESS CHANGE DISTRIBUTION:
--------------------------------------------------------------------------------
   • Stress REDUCED:    122 respondents (34.8%)
   • Stress INCREASED:  38 respondents (10.8%)
   • NO CHANGE:         191 respondents (54.4%)


         STRESS REDUCTION BY MEDITATION FREQUENCY

                     Stress_Change       Stress_Pre Stress_Post
                              mean count       mean        mean
Meditation_Frequency                                           
0 days/week                  -0.26    95       3.35        3.0

In [4]:
import pandas as pd
import numpy as np
import re
import os

# File path - use the correct filename
file_path = "/content/Research Paper Data Collection (UG) - Meditation and Academic Stress - Main.csv"

# 1) Load the CSV WITHOUT skiprows (the issue is skiprows=3 is skipping important header rows)
df = pd.read_csv(file_path, encoding='utf-8', on_bad_lines='skip')
print("Loaded rows,cols:", df.shape)
print("\nFirst 5 column names:")
for i, col in enumerate(df.columns[:10], 1):
    print(f"  {i}. {col}")
print("\n...")
print("\nColumns containing 'PRE':")
pre_related = [c for c in df.columns if 'PRE' in c.upper()]
for col in pre_related:
    print(f"  - {col}")
print("\nColumns containing 'POST':")
post_related = [c for c in df.columns if 'POST' in c.upper()]
for col in post_related:
    print(f"  - {col}")


Loaded rows,cols: (351, 49)

First 5 column names:
  1. Submission ID
  2. Respondent ID
  3. Submitted at
  4. Enter your age
  5. Enter your Gender
  6. Program of Study
  7. Semester
  8. Program/Course
  9. Program Other Field
  10. Prior Meditation Experience:

...

Columns containing 'PRE':
  - PRE1. I feel stressed about academic deadlines and exams.
  - PRE2. I struggle to concentrate on studies due to stress.
  - PRE3. Meditation does not help reduce my stress.
  - PRE4. I often feel anxious before tests or assignments.
  - PRE5. I can manage multiple academic tasks without feeling overwhelmed.
  - PRE6. Stress negatively affects my academic performance.
  - PRE7. I feel calm and focused during study sessions.

Columns containing 'POST':
  - POST1. I feel stressed about academic deadlines and exams.
  - POST2. I struggle to concentrate on studies due to stress.
  - POST3. Meditation does not help reduce my stress.
  - POST4. I often feel anxious before tests or assignments.
  

In [6]:

# Now let's create the corrected continuation code

import pandas as pd
import numpy as np
import re
import os

# File path - IMPORTANT: Remove skiprows=3 to load the data correctly
file_path = "/content/Research Paper Data Collection (UG) - Meditation and Academic Stress - Main.csv"

# 1) Load the CSV (without skiprows)
df = pd.read_csv(file_path, encoding='utf-8', on_bad_lines='skip')
print("✅ Loaded rows, cols:", df.shape)
print("\n" + "="*80 + "\n")

# 2) Detect PRE/POST item columns
pre_cols = [c for c in df.columns if c.strip().upper().startswith('PRE')]
post_cols = [c for c in df.columns if c.strip().upper().startswith('POST')]

print("✅ Detected PRE cols:", len(pre_cols))
for col in pre_cols:
    print(f"   - {col}")
print()
print("✅ Detected POST cols:", len(post_cols))
for col in post_cols:
    print(f"   - {col}")
print("\n" + "="*80 + "\n")

# 3) Create numeric treatment & outcome variables
# Map frequency text -> numeric days/week
def freq_to_days(val):
    if pd.isna(val):
        return np.nan
    s = str(val).lower()
    if 'daily' in s or 'everyday' in s:
        return 7.0
    if '5-6' in s:
        return 5.5
    if '3-4' in s or '3 - 4' in s or '3 to 4' in s:
        return 3.5
    if '1-2' in s or '1 - 2' in s or '1 to 2' in s:
        return 1.5
    if 'none' in s or '0' in s or 'never' in s:
        return 0.0
    if 'rare' in s:
        return 0.3
    if 'occasional' in s:
        return 1.5
    if 'frequent' in s:
        return 4.0
    if 'weekly' in s:
        m = re.search(r'(\d+)', s)
        return float(m.group(1)) if m else 1.0
    m = re.search(r'(\d+(\.\d+)?)', s)
    if m:
        return float(m.group(1))
    return np.nan

# Find the frequency column (corrected name)
freq_col = "Frequency of Meditation:"
if freq_col not in df.columns:
    freq_candidates = [c for c in df.columns if 'frequency' in c.lower()]
    freq_col = freq_candidates[0] if freq_candidates else None

if freq_col:
    df['meditation_freq_days'] = df[freq_col].apply(freq_to_days)
    print(f"✅ Created 'meditation_freq_days' from column: {freq_col}")
    print(f"   Unique values: {sorted(df['meditation_freq_days'].dropna().unique())}")
else:
    df['meditation_freq_days'] = np.nan
    print("⚠️ Frequency column not found")

print("\n" + "="*80 + "\n")

# Map average duration text -> minutes numeric
dur_col = "Average Duration of each meditation session:"
def duration_to_min(x):
    if pd.isna(x): return np.nan
    s = str(x).lower()
    if '<5' in s or 'less than 5' in s: return 2.5
    if '5-15' in s or '5 - 15' in s: return 10
    if '15-30' in s: return 22.5
    if '30-60' in s: return 45
    if '>60' in s or 'more than 60' in s: return 75
    m = re.search(r'(\d+)', s)
    return float(m.group(1)) if m else np.nan

if dur_col in df.columns:
    df['meditation_dur_min'] = df[dur_col].apply(duration_to_min)
    print(f"✅ Created 'meditation_dur_min' from column: {dur_col}")
    print(f"   Unique values: {sorted(df['meditation_dur_min'].dropna().unique())}")
else:
    df['meditation_dur_min'] = np.nan
    print("⚠️ Duration column not found")

print("\n" + "="*80 + "\n")

# Build PRE/POST scores (sum of items)
if len(pre_cols) >= 1 and len(post_cols) >= 1:
    # Convert to numeric where possible
    df[pre_cols] = df[pre_cols].apply(pd.to_numeric, errors='coerce')
    df[post_cols] = df[post_cols].apply(pd.to_numeric, errors='coerce')

    # Calculate scores
    df['PRE_score'] = df[pre_cols].sum(axis=1)
    df['POST_score'] = df[post_cols].sum(axis=1)
    df['delta_score'] = df['POST_score'] - df['PRE_score']   # negative = reduced stress

    print("✅ Created PRE/POST scores:")
    print(f"   PRE_score range: {df['PRE_score'].min():.1f} - {df['PRE_score'].max():.1f}")
    print(f"   POST_score range: {df['POST_score'].min():.1f} - {df['POST_score'].max():.1f}")
    print(f"   Average PRE_score: {df['PRE_score'].mean():.2f}")
    print(f"   Average POST_score: {df['POST_score'].mean():.2f}")
    print(f"   Average delta_score: {df['delta_score'].mean():.2f}")
else:
    raise RuntimeError("Could not find PRE/POST columns. Please check column names.")

print("\n" + "="*80 + "\n")

# Target (classification): did stress *decrease* after meditation?
# improved = 1 if POST < PRE (stress decreased)
df['improved'] = (df['POST_score'] < df['PRE_score']).astype(int)

# Inspect class balance
print("✅ Target variable 'improved' created:")
print("\nImproved counts:")
print(df['improved'].value_counts(dropna=False).sort_index())
print()
print("Percentage distribution:")
improved_pct = df['improved'].value_counts(normalize=True, dropna=False).sort_index() * 100
for idx, val in improved_pct.items():
    label = "Did NOT improve (stress same or increased)" if idx == 0 else "IMPROVED (stress decreased)"
    print(f"   {idx}: {label} = {val:.1f}%")

print("\n" + "="*80)
print("✅ Data preparation complete!")
print("="*80)


✅ Loaded rows, cols: (351, 49)


✅ Detected PRE cols: 7
   - PRE1. I feel stressed about academic deadlines and exams.
   - PRE2. I struggle to concentrate on studies due to stress.
   - PRE3. Meditation does not help reduce my stress.
   - PRE4. I often feel anxious before tests or assignments.
   - PRE5. I can manage multiple academic tasks without feeling overwhelmed.
   - PRE6. Stress negatively affects my academic performance.
   - PRE7. I feel calm and focused during study sessions.

✅ Detected POST cols: 7
   - POST1. I feel stressed about academic deadlines and exams.
   - POST2. I struggle to concentrate on studies due to stress.
   - POST3. Meditation does not help reduce my stress.
   - POST4. I often feel anxious before tests or assignments.
   - POST5. I can manage multiple academic tasks without feeling overwhelmed.
   - POST6. Stress negatively affects my academic performance.
   - POST7. I feel calm and focused during study sessions.


✅ Created 'meditation_freq_days' f

In [7]:

# Let's also show some additional statistics and save the processed data

print("\n" + "="*80)
print("          ADDITIONAL STATISTICS & DATA INSIGHTS")
print("="*80 + "\n")

# Check for missing values in key columns
print("Missing values in key variables:")
print(f"   meditation_freq_days: {df['meditation_freq_days'].isna().sum()} ({df['meditation_freq_days'].isna().sum()/len(df)*100:.1f}%)")
print(f"   meditation_dur_min: {df['meditation_dur_min'].isna().sum()} ({df['meditation_dur_min'].isna().sum()/len(df)*100:.1f}%)")
print(f"   PRE_score: {df['PRE_score'].isna().sum()} ({df['PRE_score'].isna().sum()/len(df)*100:.1f}%)")
print(f"   POST_score: {df['POST_score'].isna().sum()} ({df['POST_score'].isna().sum()/len(df)*100:.1f}%)")
print()

# Show distribution of delta_score
print("Delta Score Distribution (POST - PRE):")
print(f"   Negative (improved): {(df['delta_score'] < 0).sum()} students")
print(f"   Zero (no change): {(df['delta_score'] == 0).sum()} students")
print(f"   Positive (worsened): {(df['delta_score'] > 0).sum()} students")
print()

# Improvement by meditation frequency
print("Improvement rate by Meditation Frequency:")
improvement_by_freq = df.groupby('meditation_freq_days')['improved'].agg(['mean', 'count'])
improvement_by_freq['improvement_rate_%'] = (improvement_by_freq['mean'] * 100).round(1)
improvement_by_freq = improvement_by_freq.rename(columns={'mean': 'improvement_rate', 'count': 'n_students'})
print(improvement_by_freq[['n_students', 'improvement_rate_%']])
print()

# Improvement by meditation duration
print("Improvement rate by Meditation Duration:")
improvement_by_dur = df.groupby('meditation_dur_min')['improved'].agg(['mean', 'count'])
improvement_by_dur['improvement_rate_%'] = (improvement_by_dur['mean'] * 100).round(1)
improvement_by_dur = improvement_by_dur.rename(columns={'mean': 'improvement_rate', 'count': 'n_students'})
print(improvement_by_dur[['n_students', 'improvement_rate_%']])

print("\n" + "="*80)



          ADDITIONAL STATISTICS & DATA INSIGHTS

Missing values in key variables:
   meditation_freq_days: 0 (0.0%)
   meditation_dur_min: 0 (0.0%)
   PRE_score: 0 (0.0%)
   POST_score: 0 (0.0%)

Delta Score Distribution (POST - PRE):
   Negative (improved): 168 students
   Zero (no change): 64 students
   Positive (worsened): 119 students

Improvement rate by Meditation Frequency:
                      n_students  improvement_rate_%
meditation_freq_days                                
0.0                           95                49.5
1.5                          188                47.3
3.5                           45                53.3
5.5                           14                42.9
7.0                            9                22.2

Improvement rate by Meditation Duration:
                    n_students  improvement_rate_%
meditation_dur_min                                
2.5                        149                42.3
10.0                       149                53

In [10]:

# Create the complete corrected code file for the user
# Corrected Code for Meditation & Academic Stress Analysis
# Including data loading, preprocessing, and feature engineering

import pandas as pd
import numpy as np
import re
import os

# ============================================================================
# STEP 1: LOAD THE CSV FILE
# ============================================================================
# IMPORTANT: Remove skiprows=3 - it was causing the header row to be skipped!
file_path = "/content/Research Paper Data Collection (UG) - Meditation and Academic Stress - Main.csv"

# Load the CSV
df = pd.read_csv(file_path, encoding='utf-8', on_bad_lines='skip')
print("✅ Loaded rows, cols:", df.shape)
print("\\n" + "="*80 + "\\n")

# ============================================================================
# STEP 2: DETECT PRE/POST ITEM COLUMNS
# ============================================================================
pre_cols = [c for c in df.columns if c.strip().upper().startswith('PRE')]
post_cols = [c for c in df.columns if c.strip().upper().startswith('POST')]

print("✅ Detected PRE cols:", len(pre_cols))
for col in pre_cols:
    print(f"   - {col}")
print()
print("✅ Detected POST cols:", len(post_cols))
for col in post_cols:
    print(f"   - {col}")
print("\\n" + "="*80 + "\\n")

# ============================================================================
# STEP 3: CREATE NUMERIC TREATMENT & OUTCOME VARIABLES
# ============================================================================

# Function to map frequency text -> numeric days/week
def freq_to_days(val):
    if pd.isna(val):
        return np.nan
    s = str(val).lower()
    if 'daily' in s or 'everyday' in s:
        return 7.0
    if '5-6' in s:
        return 5.5
    if '3-4' in s or '3 - 4' in s or '3 to 4' in s:
        return 3.5
    if '1-2' in s or '1 - 2' in s or '1 to 2' in s:
        return 1.5
    if 'none' in s or '0' in s or 'never' in s:
        return 0.0
    if 'rare' in s:
        return 0.3
    if 'occasional' in s:
        return 1.5
    if 'frequent' in s:
        return 4.0
    if 'weekly' in s:
        m = re.search(r'(\\d+)', s)
        return float(m.group(1)) if m else 1.0
    m = re.search(r'(\\d+(\\.\\d+)?)', s)
    if m:
        return float(m.group(1))
    return np.nan

# Find the frequency column (corrected name without "B." prefix)
freq_col = "Frequency of Meditation:"
if freq_col not in df.columns:
    freq_candidates = [c for c in df.columns if 'frequency' in c.lower()]
    freq_col = freq_candidates[0] if freq_candidates else None

if freq_col:
    df['meditation_freq_days'] = df[freq_col].apply(freq_to_days)
    print(f"✅ Created 'meditation_freq_days' from column: {freq_col}")
    print(f"   Unique values: {sorted(df['meditation_freq_days'].dropna().unique())}")
else:
    df['meditation_freq_days'] = np.nan
    print("⚠️ Frequency column not found")

print("\\n" + "="*80 + "\\n")

# Function to map average duration text -> minutes numeric
def duration_to_min(x):
    if pd.isna(x):
        return np.nan
    s = str(x).lower()
    if '<5' in s or 'less than 5' in s:
        return 2.5
    if '5-15' in s or '5 - 15' in s:
        return 10
    if '15-30' in s:
        return 22.5
    if '30-60' in s:
        return 45
    if '>60' in s or 'more than 60' in s:
        return 75
    m = re.search(r'(\\d+)', s)
    return float(m.group(1)) if m else np.nan

# Find the duration column
dur_col = "Average Duration of each meditation session:"
if dur_col in df.columns:
    df['meditation_dur_min'] = df[dur_col].apply(duration_to_min)
    print(f"✅ Created 'meditation_dur_min' from column: {dur_col}")
    print(f"   Unique values: {sorted(df['meditation_dur_min'].dropna().unique())}")
else:
    df['meditation_dur_min'] = np.nan
    print("⚠️ Duration column not found")

print("\\n" + "="*80 + "\\n")

# ============================================================================
# STEP 4: BUILD PRE/POST SCORES (SUM OF ITEMS)
# ============================================================================
if len(pre_cols) >= 1 and len(post_cols) >= 1:
    # Convert to numeric where possible
    df[pre_cols] = df[pre_cols].apply(pd.to_numeric, errors='coerce')
    df[post_cols] = df[post_cols].apply(pd.to_numeric, errors='coerce')

    # Calculate scores
    df['PRE_score'] = df[pre_cols].sum(axis=1)
    df['POST_score'] = df[post_cols].sum(axis=1)
    df['delta_score'] = df['POST_score'] - df['PRE_score']   # negative = reduced stress

    print("✅ Created PRE/POST scores:")
    print(f"   PRE_score range: {df['PRE_score'].min():.1f} - {df['PRE_score'].max():.1f}")
    print(f"   POST_score range: {df['POST_score'].min():.1f} - {df['POST_score'].max():.1f}")
    print(f"   Average PRE_score: {df['PRE_score'].mean():.2f}")
    print(f"   Average POST_score: {df['POST_score'].mean():.2f}")
    print(f"   Average delta_score: {df['delta_score'].mean():.2f}")
else:
    raise RuntimeError("Could not find PRE/POST columns. Please check column names.")

print("\\n" + "="*80 + "\\n")

# ============================================================================
# STEP 5: CREATE TARGET VARIABLE (CLASSIFICATION)
# ============================================================================
# Target: did stress *decrease* after meditation?
# improved = 1 if POST < PRE (stress decreased)
df['improved'] = (df['POST_score'] < df['PRE_score']).astype(int)

# Inspect class balance
print("✅ Target variable 'improved' created:")
print("\\nImproved counts:")
print(df['improved'].value_counts(dropna=False).sort_index())
print()
print("Percentage distribution:")
improved_pct = df['improved'].value_counts(normalize=True, dropna=False).sort_index() * 100
for idx, val in improved_pct.items():
    label = "Did NOT improve (stress same or increased)" if idx == 0 else "IMPROVED (stress decreased)"
    print(f"   {idx}: {label} = {val:.1f}%")

print("\\n" + "="*80)
print("✅ Data preparation complete!")
print("="*80)

# ============================================================================
# ADDITIONAL STATISTICS
# ============================================================================
print("\\n" + "="*80)
print("          ADDITIONAL STATISTICS & DATA INSIGHTS")
print("="*80 + "\\n")

# Check for missing values in key columns
print("Missing values in key variables:")
print(f"   meditation_freq_days: {df['meditation_freq_days'].isna().sum()} ({df['meditation_freq_days'].isna().sum()/len(df)*100:.1f}%)")
print(f"   meditation_dur_min: {df['meditation_dur_min'].isna().sum()} ({df['meditation_dur_min'].isna().sum()/len(df)*100:.1f}%)")
print(f"   PRE_score: {df['PRE_score'].isna().sum()} ({df['PRE_score'].isna().sum()/len(df)*100:.1f}%)")
print(f"   POST_score: {df['POST_score'].isna().sum()} ({df['POST_score'].isna().sum()/len(df)*100:.1f}%)")
print()

# Show distribution of delta_score
print("Delta Score Distribution (POST - PRE):")
print(f"   Negative (improved): {(df['delta_score'] < 0).sum()} students")
print(f"   Zero (no change): {(df['delta_score'] == 0).sum()} students")
print(f"   Positive (worsened): {(df['delta_score'] > 0).sum()} students")
print()

# Improvement by meditation frequency
print("Improvement rate by Meditation Frequency:")
improvement_by_freq = df.groupby('meditation_freq_days')['improved'].agg(['mean', 'count'])
improvement_by_freq['improvement_rate_%'] = (improvement_by_freq['mean'] * 100).round(1)
improvement_by_freq = improvement_by_freq.rename(columns={'mean': 'improvement_rate', 'count': 'n_students'})
print(improvement_by_freq[['n_students', 'improvement_rate_%']])
print()

# Improvement by meditation duration
print("Improvement rate by Meditation Duration:")
improvement_by_dur = df.groupby('meditation_dur_min')['improved'].agg(['mean', 'count'])
improvement_by_dur['improvement_rate_%'] = (improvement_by_dur['mean'] * 100).round(1)
improvement_by_dur = improvement_by_dur.rename(columns={'mean': 'improvement_rate', 'count': 'n_students'})
print(improvement_by_dur[['n_students', 'improvement_rate_%']])

print("\\n" + "="*80)
print("\\n🎉 All done! The dataframe 'df' is ready for modeling.")
print("="*80)

# Save the complete corrected code
with open("complete_meditation_analysis.py", "w", encoding="utf-8") as f:
    f.write(complete_code)

print("✅ Complete corrected Python script saved to: complete_meditation_analysis.py")
print("\n" + "="*80)
print("KEY FIXES MADE:")
print("="*80)
print("1. ❌ REMOVED: skiprows=3 (this was skipping the header row)")
print("2. ✅ FIXED: Column name 'B. Frequency of Meditation:' → 'Frequency of Meditation:'")
print("3. ✅ FIXED: Column name 'B. Average Duration...' → 'Average Duration...'")
print("4. ✅ ADDED: Support for '5-6 days/week' frequency option")
print("5. ✅ ADDED: Additional statistics and insights")
print("="*80)


✅ Loaded rows, cols: (351, 49)
✅ Detected PRE cols: 7
   - PRE1. I feel stressed about academic deadlines and exams.
   - PRE2. I struggle to concentrate on studies due to stress.
   - PRE3. Meditation does not help reduce my stress.
   - PRE4. I often feel anxious before tests or assignments.
   - PRE5. I can manage multiple academic tasks without feeling overwhelmed.
   - PRE6. Stress negatively affects my academic performance.
   - PRE7. I feel calm and focused during study sessions.

✅ Detected POST cols: 7
   - POST1. I feel stressed about academic deadlines and exams.
   - POST2. I struggle to concentrate on studies due to stress.
   - POST3. Meditation does not help reduce my stress.
   - POST4. I often feel anxious before tests or assignments.
   - POST5. I can manage multiple academic tasks without feeling overwhelmed.
   - POST6. Stress negatively affects my academic performance.
   - POST7. I feel calm and focused during study sessions.
✅ Created 'meditation_freq_days' from 

In [11]:

# Let's also save the processed dataframe with key variables for further analysis

# Select key columns for export
key_columns = [
    'Enter your Gender',
    'Frequency of Meditation:',
    'Average Duration of each meditation session:',
    'meditation_freq_days',
    'meditation_dur_min',
    'PRE_score',
    'POST_score',
    'delta_score',
    'improved'
]

# Add all PRE and POST columns
export_cols = key_columns + pre_cols + post_cols

# Create export dataframe
df_export = df[export_cols].copy()

# Save to CSV
df_export.to_csv("processed_meditation_data.csv", index=False)

print("✅ Processed data saved to: processed_meditation_data.csv")
print(f"\n   Total rows: {len(df_export)}")
print(f"   Total columns: {len(df_export.columns)}")
print(f"\n   Columns included:")
print(f"   - Demographics: Gender")
print(f"   - Meditation variables: frequency (days/week), duration (minutes)")
print(f"   - Outcome variables: PRE_score, POST_score, delta_score, improved")
print(f"   - Individual items: {len(pre_cols)} PRE items + {len(post_cols)} POST items")
print("\n" + "="*80)


✅ Processed data saved to: processed_meditation_data.csv

   Total rows: 351
   Total columns: 23

   Columns included:
   - Demographics: Gender
   - Meditation variables: frequency (days/week), duration (minutes)
   - Outcome variables: PRE_score, POST_score, delta_score, improved
   - Individual items: 7 PRE items + 7 POST items

