# Faculty Survey

In [None]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
import seaborn as sns

In [None]:
df = pd.read_csv("faculty_survey_anonymized.csv")

alias_map = {f"col{i}": col for i, col in enumerate(df.columns, start=1)}
reverse_map = {v: k for k, v in alias_map.items()}

df_alias = df.rename(columns=reverse_map)

print("Alias to Original Header Mapping:")
for alias, original in alias_map.items():
    print(f"{alias}: {original}")

df_alias = df_alias.fillna("No Response")

## Demographics

In [None]:
fig1 = px.pie(df_alias, names = "col7", title=alias_map["col7"], color_discrete_sequence=["#8A100B", "#B29d6c"]
)

fig1.show()

In [None]:
fig2 = px.pie(df_alias, names = "col8", title=alias_map["col8"],color_discrete_sequence=["#8A100B", "#B29d6c"]
)

fig2.show()

In [None]:
fig3 = px.bar(
    df_alias,
    x="col9",
    title=alias_map["col9"], 
    color_discrete_sequence=["#8A100B", "#B29d6c"]  # show original column name as chart title
)

fig3.update_layout(
    xaxis={'categoryorder': 'category ascending'}
)

fig3.show()

In [None]:
fig4 = px.pie(
    df_alias,
    names="col10",
    hole=0.4,
    title=alias_map["col10"],
    color_discrete_sequence=["#8A100B", "#B29d6c"]
)

fig4.show()

## Survey Results

In [None]:
importance_map = {
    "extremely important": 1,
    "very important": 2,
    "moderately important": 3,
    "slightly important": 4,
    "not at all important": 5,
    "no response": np.nan   # treat "No Response" as missing
}

In [None]:
df_mapped = df_alias.copy()

part1 = [f"col{i}" for i in range(13, 62)]
part2 = [f"col{i}" for i in range(63, 95)]
part3 = [f"col{i}" for i in range(96, 105)]

text_cols = part1 + part2 + part3

df_mapped[text_cols] = df_mapped[text_cols].map(
    lambda x: x.strip().lower() if isinstance(x, str) else x
)

df_mapped[text_cols] = df_mapped[text_cols].replace(importance_map)

df_mapped[text_cols] = df_mapped[text_cols].apply(pd.to_numeric, errors="coerce")

In [None]:
# Column-wise sums (ignores NaN)
col_sums = df_mapped[text_cols].sum()

# Dictionary of column → sum
col_sum_dict = col_sums.to_dict()

In [None]:
sorted_totals = dict(sorted(col_sum_dict.items(), key=lambda x: x[1]))

for col, total in sorted_totals.items():
    print(f"{col}: {total}")


In [None]:
topic_map = {}
for alias, original in alias_map.items():
    if " - " in original:
        topic_map[alias] = original.split(" - ", 1)[1]  # take part after dash
    else:
        topic_map[alias] = original  # fallback: just use full text




In [None]:
sorted_totals = dict(sorted(col_sum_dict.items(), key=lambda x: x[1]))

for col, total in sorted_totals.items():
    topic = topic_map.get(col, "")
    print(f"{col} | {topic} | {total}")

In [None]:
# Build DataFrame from sorted totals
results_faculty = pd.DataFrame([
    {
        "alias": col,
        "topic": topic_map.get(col, ""),
        "total_score": total
    }
    for col, total in sorted(col_sum_dict.items(), key=lambda x: x[1])
])

# Save to CSV
results_faculty.to_csv("results_faculty.csv", index=False)

print("✅ Exported results_faculty.csv")