# Physics Wallah Student Experience Survey â€” Analysis Notebook

This notebook reproduces core exploratory analysis steps on the cleaned dataset and generates figures & summary metrics.

In [None]:
# Imports
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from textblob import TextBlob
from pathlib import Path
sns.set_theme()

In [None]:
# Paths
ROOT = Path('..').resolve()
DATA = ROOT / 'data' / 'cleaned_responses.csv'
DATA.exists()

In [None]:
# Load data
df = pd.read_csv(DATA)
df.head()

## Basic numeric summaries

In [None]:
numeric_cols = [c for c in df.columns if pd.api.types.is_numeric_dtype(df[c])]
df[numeric_cols].describe()

## Satisfaction score distribution (example if present)

In [None]:
if 'satisfaction_score' in df.columns:
    sns.histplot(df['satisfaction_score'], kde=True)
    plt.title('Distribution: Satisfaction Score')
    plt.show()

## Recommendation / NPS-style calculation

In [None]:
if 'recommend_score' in df.columns:
    scores = df['recommend_score']
    promoters = (scores >= 9).mean()
    detractors = (scores <= 6).mean()
    nps = (promoters - detractors) * 100
    print(f'NPS-style score: {nps:.1f}')
    buckets = pd.cut(scores, bins=[-1,6,8,10], labels=['Detractor','Passive','Promoter'])
    buckets.value_counts(normalize=True)

## Sentiment analysis of open feedback

In [None]:
if 'open_feedback' in df.columns:
    sentiments = df['open_feedback'].dropna().astype(str).apply(lambda t: TextBlob(t).sentiment.polarity)
    print('Sentiment polarity stats:')
    sentiments.describe()

## Categorical counts (device, usage frequency)

In [None]:
categorical_cols = [c for c in df.columns if df[c].dtype == 'object' and c not in ['open_feedback']]
for c in categorical_cols:
    sns.countplot(y=df[c])
    plt.title(f'Counts: {c}')
    plt.show()

## Save figures (optional)

In [None]:
FIG_DIR = ROOT / 'artifacts' / 'figures'
FIG_DIR.mkdir(parents=True, exist_ok=True)
if 'satisfaction_score' in df.columns:
    ax = sns.histplot(df['satisfaction_score'], kde=True)
    ax.set_title('Distribution: Satisfaction Score')
    fig_path = FIG_DIR / 'dist_satisfaction_score.png'
    plt.tight_layout(); plt.savefig(fig_path, dpi=150); plt.clf()
    fig_path

## Export lightweight summary

In [None]:
summary_lines = []
summary_lines.append(f'Rows: {len(df)}')
summary_lines.append(f'Columns: {len(df.columns)}')
if 'recommend_score' in df.columns:
    scores = df['recommend_score']
    promoters = (scores >= 9).mean()
    detractors = (scores <= 6).mean()
    nps = (promoters - detractors) * 100
    summary_lines.append(f'NPS-style score: {nps:.1f}')
summary_md = '\n'.join(summary_lines)
summary_path = ROOT / 'artifacts' / 'summary.md'
with open(summary_path, 'a', encoding='utf-8') as f:
    f.write('\n\nNotebook summary appended:\n')
    f.write(summary_md)
summary_md