# Staff Survey Analysis

In [1]:
import pandas as pd
from collections import Counter
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

# Read the CSV file
df = pd.read_csv('staff_survey.csv')

# Remove metadata columns (first 6 columns)
metadata_cols = ['Start Date', 'End Date', 'Progress', 'Duration (in seconds)', 'Finished', 'Recorded Date']
survey_df = df.drop(columns=metadata_cols)

# Total number of possible responses (all rows)
total_possible = len(survey_df)

# Store results for sorting
importance_questions = []
ranking_questions = []
open_ended_questions = []

# Define the order for importance levels
importance_order = ['Extremely important', 'Very important', 'Moderately Important', 
                   'Moderately important', 'Slightly important', 'Not Important', 'Not at all important']

print(f"Analyzing survey with {total_possible} total respondents...")

# Analyze each question
for col in survey_df.columns:
    responses = survey_df[col]
    responses_not_null = responses.dropna()
    null_count = total_possible - len(responses_not_null)
    
    if len(responses_not_null) == 0:
        continue
    
    counts = Counter(responses_not_null)
    
    # Check if it's an importance question
    if any(imp in counts for imp in importance_order):
        result = {
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        }
        
        # Add counts and percentages for each importance level
        for level in importance_order:
            count = counts.get(level, 0)
            pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
            result[f'{level}'] = f"{count} ({pct:.1f}%)"
        
        importance_questions.append(result)
    
    # Check if it's a ranking question (1-7)
    elif all(isinstance(r, (int, float)) for r in responses_not_null if pd.notna(r)):
        result = {
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        }
        
        # Add counts and percentages for each rank (1-7)
        for rank in range(1, 8):
            count = counts.get(rank, 0)
            pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
            result[f'Rank {rank}'] = f"{count} ({pct:.1f}%)"
        
        ranking_questions.append(result)
    
    # Open-ended questions
    else:
        open_ended_questions.append({
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        })

# Create DataFrames
importance_df = pd.DataFrame(importance_questions) if importance_questions else None
ranking_df = pd.DataFrame(ranking_questions) if ranking_questions else None
open_ended_df = pd.DataFrame(open_ended_questions) if open_ended_questions else None

# Sort by response rate and top category
if importance_df is not None:
    importance_df = importance_df.sort_values('Extremely important', 
                                             key=lambda x: x.str.extract(r'(\d+)')[0].astype(int), 
                                             ascending=False)

if ranking_df is not None:
    ranking_df = ranking_df.sort_values('Rank 1', 
                                       key=lambda x: x.str.extract(r'(\d+)')[0].astype(int), 
                                       ascending=False)

if open_ended_df is not None:
    open_ended_df = open_ended_df.sort_values('Response Rate %', ascending=False)

# Function to format Excel sheet
def format_sheet(ws, df, title):
    # Set title
    ws['A1'] = title
    ws['A1'].font = Font(size=14, bold=True)
    ws['A1'].fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
    ws['A1'].font = Font(size=14, bold=True, color="FFFFFF")
    ws.merge_cells(f'A1:{get_column_letter(len(df.columns))}1')
    
    # Write headers in row 2
    for col_num, column_title in enumerate(df.columns, 1):
        cell = ws.cell(row=2, column=col_num)
        cell.value = column_title
        cell.font = Font(bold=True, size=11)
        cell.fill = PatternFill(start_color="D9E1F2", end_color="D9E1F2", fill_type="solid")
        cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
    
    # Write data starting from row 3
    for r_idx, row in enumerate(df.values, 3):
        for c_idx, value in enumerate(row, 1):
            cell = ws.cell(row=r_idx, column=c_idx)
            cell.value = value
            
            # Align differently based on column
            if c_idx == 1:  # Question column
                cell.alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
            else:
                cell.alignment = Alignment(horizontal='center', vertical='center')
            
            # Add borders
            thin_border = Border(
                left=Side(style='thin'),
                right=Side(style='thin'),
                top=Side(style='thin'),
                bottom=Side(style='thin')
            )
            cell.border = thin_border
            
            # Alternate row colors
            if r_idx % 2 == 0:
                cell.fill = PatternFill(start_color="F2F2F2", end_color="F2F2F2", fill_type="solid")
    
    # Adjust column widths
    ws.column_dimensions['A'].width = 60  # Question column
    for col_num in range(2, len(df.columns) + 1):
        ws.column_dimensions[get_column_letter(col_num)].width = 15
    
    # Freeze panes
    ws.freeze_panes = 'B3'

# Export to Excel with formatting
print("\nCreating formatted Excel file...")
with pd.ExcelWriter('survey_analysis_summary.xlsx', engine='openpyxl') as writer:
    
    if importance_df is not None:
        importance_df.to_excel(writer, sheet_name='Importance Questions', index=False, startrow=1)
        ws = writer.sheets['Importance Questions']
        format_sheet(ws, importance_df, f'IMPORTANCE QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(importance_df)} importance questions")
    
    if ranking_df is not None:
        ranking_df.to_excel(writer, sheet_name='Ranking Questions', index=False, startrow=1)
        ws = writer.sheets['Ranking Questions']
        format_sheet(ws, ranking_df, f'RANKING QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(ranking_df)} ranking questions")
    
    if open_ended_df is not None:
        open_ended_df.to_excel(writer, sheet_name='Open-Ended Questions', index=False, startrow=1)
        ws = writer.sheets['Open-Ended Questions']
        format_sheet(ws, open_ended_df, f'OPEN-ENDED QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(open_ended_df)} open-ended questions")

print("\n" + "="*80)
print("✓ ANALYSIS COMPLETE - Saved to 'survey_analysis_summary.xlsx'")
print("="*80)
print(f"\nSummary:")
print(f"  Total respondents: {total_possible}")
print(f"  Importance questions: {len(importance_questions) if importance_questions else 0}")
print(f"  Ranking questions: {len(ranking_questions) if ranking_questions else 0}")
print(f"  Open-ended questions: {len(open_ended_questions) if open_ended_questions else 0}")
print("\nOpen the Excel file to see the beautifully formatted results! 📊")

Analyzing survey with 20 total respondents...

Creating formatted Excel file...
✓ Added 78 importance questions
✓ Added 11 ranking questions
✓ Added 5 open-ended questions

✓ ANALYSIS COMPLETE - Saved to 'survey_analysis_summary.xlsx'

Summary:
  Total respondents: 20
  Importance questions: 78
  Ranking questions: 11
  Open-ended questions: 5

Open the Excel file to see the beautifully formatted results! 📊


# Faculty Survey Analysis

In [11]:
import pandas as pd
from collections import Counter
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

# Read the CSV file
df = pd.read_csv('faculty_survey.csv')

# Remove metadata columns (first 6 columns)
metadata_cols = ['Start Date', 'End Date', 'Progress', 'Duration (in seconds)', 'Finished', 'Recorded Date']
survey_df = df.drop(columns=metadata_cols)

# Total number of possible responses (all rows)
total_possible = len(survey_df)

# Store results for sorting
importance_questions = []
ranking_questions = []
open_ended_questions = []

# Define the order for importance levels
importance_order = ['Extremely important', 'Very important', 'Moderately Important', 
                   'Moderately important', 'Slightly important', 'Not Important', 'Not at all important']

print(f"Analyzing faculty survey with {total_possible} total respondents...")

# Analyze each question
for col in survey_df.columns:
    responses = survey_df[col]
    responses_not_null = responses.dropna()
    null_count = total_possible - len(responses_not_null)
    
    if len(responses_not_null) == 0:
        continue
    
    counts = Counter(responses_not_null)
    
    # Check if it's an importance question
    if any(imp in counts for imp in importance_order):
        result = {
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        }
        
        # Add counts and percentages for each importance level
        for level in importance_order:
            count = counts.get(level, 0)
            pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
            result[f'{level}'] = f"{count} ({pct:.1f}%)"
        
        importance_questions.append(result)
    
    # Check if it's a ranking question (1-7)
    elif all(isinstance(r, (int, float)) for r in responses_not_null if pd.notna(r)):
        result = {
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        }
        
        # Add counts and percentages for each rank (1-7)
        for rank in range(1, 8):
            count = counts.get(rank, 0)
            pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
            result[f'Rank {rank}'] = f"{count} ({pct:.1f}%)"
        
        ranking_questions.append(result)
    
    # Open-ended questions
    else:
        open_ended_questions.append({
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        })

# Create DataFrames
importance_df = pd.DataFrame(importance_questions) if importance_questions else None
ranking_df = pd.DataFrame(ranking_questions) if ranking_questions else None
open_ended_df = pd.DataFrame(open_ended_questions) if open_ended_questions else None

# Sort by response rate and top category
if importance_df is not None:
    importance_df = importance_df.sort_values('Extremely important', 
                                             key=lambda x: x.str.extract(r'(\d+)')[0].astype(int), 
                                             ascending=False)

if ranking_df is not None:
    ranking_df = ranking_df.sort_values('Rank 1', 
                                       key=lambda x: x.str.extract(r'(\d+)')[0].astype(int), 
                                       ascending=False)

if open_ended_df is not None:
    open_ended_df = open_ended_df.sort_values('Response Rate %', ascending=False)

# Function to format Excel sheet
def format_sheet(ws, df, title):
    # Set title
    ws['A1'] = title
    ws['A1'].font = Font(size=14, bold=True)
    ws['A1'].fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
    ws['A1'].font = Font(size=14, bold=True, color="FFFFFF")
    ws.merge_cells(f'A1:{get_column_letter(len(df.columns))}1')
    
    # Write headers in row 2
    for col_num, column_title in enumerate(df.columns, 1):
        cell = ws.cell(row=2, column=col_num)
        cell.value = column_title
        cell.font = Font(bold=True, size=11)
        cell.fill = PatternFill(start_color="D9E1F2", end_color="D9E1F2", fill_type="solid")
        cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
    
    # Write data starting from row 3
    for r_idx, row in enumerate(df.values, 3):
        for c_idx, value in enumerate(row, 1):
            cell = ws.cell(row=r_idx, column=c_idx)
            cell.value = value
            
            # Align differently based on column
            if c_idx == 1:  # Question column
                cell.alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
            else:
                cell.alignment = Alignment(horizontal='center', vertical='center')
            
            # Add borders
            thin_border = Border(
                left=Side(style='thin'),
                right=Side(style='thin'),
                top=Side(style='thin'),
                bottom=Side(style='thin')
            )
            cell.border = thin_border
            
            # Alternate row colors
            if r_idx % 2 == 0:
                cell.fill = PatternFill(start_color="F2F2F2", end_color="F2F2F2", fill_type="solid")
    
    # Adjust column widths
    ws.column_dimensions['A'].width = 60  # Question column
    for col_num in range(2, len(df.columns) + 1):
        ws.column_dimensions[get_column_letter(col_num)].width = 15
    
    # Freeze panes
    ws.freeze_panes = 'B3'

# Export to Excel with formatting
print("\nCreating formatted Excel file...")
with pd.ExcelWriter('faculty_survey_analysis.xlsx', engine='openpyxl') as writer:
    
    if importance_df is not None:
        importance_df.to_excel(writer, sheet_name='Importance Questions', index=False, startrow=1)
        ws = writer.sheets['Importance Questions']
        format_sheet(ws, importance_df, f'IMPORTANCE QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(importance_df)} importance questions")
    
    if ranking_df is not None:
        ranking_df.to_excel(writer, sheet_name='Ranking Questions', index=False, startrow=1)
        ws = writer.sheets['Ranking Questions']
        format_sheet(ws, ranking_df, f'RANKING QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(ranking_df)} ranking questions")
    
    if open_ended_df is not None:
        open_ended_df.to_excel(writer, sheet_name='Open-Ended Questions', index=False, startrow=1)
        ws = writer.sheets['Open-Ended Questions']
        format_sheet(ws, open_ended_df, f'OPEN-ENDED QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(open_ended_df)} open-ended questions")

print("\n" + "="*80)
print("✓ ANALYSIS COMPLETE - Saved to 'faculty_survey_analysis.xlsx'")
print("="*80)
print(f"\nSummary:")
print(f"  Total respondents: {total_possible}")
print(f"  Importance questions: {len(importance_questions) if importance_questions else 0}")
print(f"  Ranking questions: {len(ranking_questions) if ranking_questions else 0}")
print(f"  Open-ended questions: {len(open_ended_questions) if open_ended_questions else 0}")
print("\nOpen the Excel file to see the beautifully formatted results! 📊")

Analyzing faculty survey with 112 total respondents...

Creating formatted Excel file...
✓ Added 79 importance questions
✓ Added 12 ranking questions
✓ Added 11 open-ended questions

✓ ANALYSIS COMPLETE - Saved to 'faculty_survey_analysis.xlsx'

Summary:
  Total respondents: 112
  Importance questions: 79
  Ranking questions: 12
  Open-ended questions: 11

Open the Excel file to see the beautifully formatted results! 📊


# Student Survey Analysis

In [13]:
import pandas as pd
from collections import Counter
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter

# Read the CSV file
df = pd.read_csv('student_survey.csv')

# Dynamically identify and remove metadata columns
# Metadata columns are typically: dates, progress, duration, finished, recorded date, response ID, etc.
metadata_keywords = ['start date', 'end date', 'progress', 'duration', 'finished', 
                     'recorded date', 'response id', 'ip address', 'recipient', 
                     'external reference', 'location', 'distribution channel', 'user language']

# Find columns that contain metadata keywords (case insensitive)
metadata_cols = [col for col in df.columns if any(keyword in col.lower() for keyword in metadata_keywords)]

print(f"Identified {len(metadata_cols)} metadata columns to remove:")
for col in metadata_cols:
    print(f"  - {col}")

# Remove metadata columns
survey_df = df.drop(columns=metadata_cols)

# Total number of possible responses (all rows)
total_possible = len(survey_df)

# Store results for sorting
importance_questions = []
ranking_questions = []
open_ended_questions = []

# Define the order for importance levels
importance_order = ['Extremely important', 'Very important', 'Moderately Important', 
                   'Moderately important', 'Slightly important', 'Not Important', 'Not at all important']

print(f"Analyzing student survey with {total_possible} total respondents...")

# Analyze each question
for col in survey_df.columns:
    responses = survey_df[col]
    responses_not_null = responses.dropna()
    null_count = total_possible - len(responses_not_null)
    
    if len(responses_not_null) == 0:
        continue
    
    counts = Counter(responses_not_null)
    
    # Check if it's an importance question
    if any(imp in counts for imp in importance_order):
        result = {
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        }
        
        # Add counts and percentages for each importance level
        for level in importance_order:
            count = counts.get(level, 0)
            pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
            result[f'{level}'] = f"{count} ({pct:.1f}%)"
        
        importance_questions.append(result)
    
    # Check if it's a ranking question (1-7)
    elif all(isinstance(r, (int, float)) for r in responses_not_null if pd.notna(r)):
        result = {
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        }
        
        # Add counts and percentages for each rank (1-7)
        for rank in range(1, 8):
            count = counts.get(rank, 0)
            pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
            result[f'Rank {rank}'] = f"{count} ({pct:.1f}%)"
        
        ranking_questions.append(result)
    
    # Open-ended questions
    else:
        open_ended_questions.append({
            'Question': col,
            'Total Responses': len(responses_not_null),
            'No Response': null_count,
            'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
        })

# Create DataFrames
importance_df = pd.DataFrame(importance_questions) if importance_questions else None
ranking_df = pd.DataFrame(ranking_questions) if ranking_questions else None
open_ended_df = pd.DataFrame(open_ended_questions) if open_ended_questions else None

# Sort by response rate and top category
if importance_df is not None:
    importance_df = importance_df.sort_values('Extremely important', 
                                             key=lambda x: x.str.extract(r'(\d+)')[0].astype(int), 
                                             ascending=False)

if ranking_df is not None:
    ranking_df = ranking_df.sort_values('Rank 1', 
                                       key=lambda x: x.str.extract(r'(\d+)')[0].astype(int), 
                                       ascending=False)

if open_ended_df is not None:
    open_ended_df = open_ended_df.sort_values('Response Rate %', ascending=False)

# Function to format Excel sheet
def format_sheet(ws, df, title):
    # Set title
    ws['A1'] = title
    ws['A1'].font = Font(size=14, bold=True)
    ws['A1'].fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
    ws['A1'].font = Font(size=14, bold=True, color="FFFFFF")
    ws.merge_cells(f'A1:{get_column_letter(len(df.columns))}1')
    
    # Write headers in row 2
    for col_num, column_title in enumerate(df.columns, 1):
        cell = ws.cell(row=2, column=col_num)
        cell.value = column_title
        cell.font = Font(bold=True, size=11)
        cell.fill = PatternFill(start_color="D9E1F2", end_color="D9E1F2", fill_type="solid")
        cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
    
    # Write data starting from row 3
    for r_idx, row in enumerate(df.values, 3):
        for c_idx, value in enumerate(row, 1):
            cell = ws.cell(row=r_idx, column=c_idx)
            cell.value = value
            
            # Align differently based on column
            if c_idx == 1:  # Question column
                cell.alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
            else:
                cell.alignment = Alignment(horizontal='center', vertical='center')
            
            # Add borders
            thin_border = Border(
                left=Side(style='thin'),
                right=Side(style='thin'),
                top=Side(style='thin'),
                bottom=Side(style='thin')
            )
            cell.border = thin_border
            
            # Alternate row colors
            if r_idx % 2 == 0:
                cell.fill = PatternFill(start_color="F2F2F2", end_color="F2F2F2", fill_type="solid")
    
    # Adjust column widths
    ws.column_dimensions['A'].width = 60  # Question column
    for col_num in range(2, len(df.columns) + 1):
        ws.column_dimensions[get_column_letter(col_num)].width = 15
    
    # Freeze panes
    ws.freeze_panes = 'B3'

# Export to Excel with formatting
print("\nCreating formatted Excel file...")
with pd.ExcelWriter('student_survey_analysis.xlsx', engine='openpyxl') as writer:
    
    if importance_df is not None:
        importance_df.to_excel(writer, sheet_name='Importance Questions', index=False, startrow=1)
        ws = writer.sheets['Importance Questions']
        format_sheet(ws, importance_df, f'IMPORTANCE QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(importance_df)} importance questions")
    
    if ranking_df is not None:
        ranking_df.to_excel(writer, sheet_name='Ranking Questions', index=False, startrow=1)
        ws = writer.sheets['Ranking Questions']
        format_sheet(ws, ranking_df, f'RANKING QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(ranking_df)} ranking questions")
    
    if open_ended_df is not None:
        open_ended_df.to_excel(writer, sheet_name='Open-Ended Questions', index=False, startrow=1)
        ws = writer.sheets['Open-Ended Questions']
        format_sheet(ws, open_ended_df, f'OPEN-ENDED QUESTIONS (n={total_possible} total respondents)')
        print(f"✓ Added {len(open_ended_df)} open-ended questions")

print("\n" + "="*80)
print("✓ ANALYSIS COMPLETE - Saved to 'student_survey_analysis.xlsx'")
print("="*80)
print(f"\nSummary:")
print(f"  Total respondents: {total_possible}")
print(f"  Importance questions: {len(importance_questions) if importance_questions else 0}")
print(f"  Ranking questions: {len(ranking_questions) if ranking_questions else 0}")
print(f"  Open-ended questions: {len(open_ended_questions) if open_ended_questions else 0}")
print("\nOpen the Excel file to see the beautifully formatted results! 📊")

Identified 3 metadata columns to remove:
  - Progress
  - Duration (in seconds)
  - Finished
Analyzing student survey with 220 total respondents...

Creating formatted Excel file...
✓ Added 79 importance questions
✓ Added 4 ranking questions
✓ Added 20 open-ended questions

✓ ANALYSIS COMPLETE - Saved to 'student_survey_analysis.xlsx'

Summary:
  Total respondents: 220
  Importance questions: 79
  Ranking questions: 4
  Open-ended questions: 20

Open the Excel file to see the beautifully formatted results! 📊


# This is the all in one approach

In [1]:
import pandas as pd
from collections import Counter
import openpyxl
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from openpyxl.utils import get_column_letter
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px

# Define survey files and their labels
surveys = {
    'Staff': 'staff_survey.csv',
    'Faculty': 'faculty_survey.csv',
    'Student': 'student_survey.csv'
}

# Store all results
all_importance_data = []
all_ranking_data = []
survey_summaries = []

# Define the order for importance levels
importance_order = ['Extremely important', 'Very important', 'Moderately Important', 
                   'Moderately important', 'Slightly important', 'Not Important', 'Not at all important']

def analyze_survey(file_path, survey_type):
    """Analyze a single survey and return structured data"""
    print(f"\n{'='*80}")
    print(f"Analyzing {survey_type} Survey...")
    print('='*80)
    
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Dynamically identify and remove metadata columns
    metadata_keywords = ['start date', 'end date', 'progress', 'duration', 'finished', 
                         'recorded date', 'response id', 'ip address', 'recipient', 
                         'external reference', 'location', 'distribution channel', 'user language']
    
    metadata_cols = [col for col in df.columns if any(keyword in col.lower() for keyword in metadata_keywords)]
    survey_df = df.drop(columns=metadata_cols)
    
    total_possible = len(survey_df)
    
    importance_questions = []
    ranking_questions = []
    open_ended_questions = []
    
    # Analyze each question
    for col in survey_df.columns:
        responses = survey_df[col]
        responses_not_null = responses.dropna()
        null_count = total_possible - len(responses_not_null)
        
        if len(responses_not_null) == 0:
            continue
        
        counts = Counter(responses_not_null)
        
        # Check if it's an importance question
        if any(imp in counts for imp in importance_order):
            result = {
                'Survey': survey_type,
                'Question': col,
                'Total Responses': len(responses_not_null),
                'No Response': null_count,
                'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
            }
            
            # Add counts and percentages for each importance level
            for level in importance_order:
                count = counts.get(level, 0)
                pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
                result[f'{level}'] = f"{count} ({pct:.1f}%)"
                result[f'{level}_count'] = count
                result[f'{level}_pct'] = pct
            
            importance_questions.append(result)
            all_importance_data.append(result)
        
        # Check if it's a ranking question (1-7)
        elif all(isinstance(r, (int, float)) for r in responses_not_null if pd.notna(r)):
            result = {
                'Survey': survey_type,
                'Question': col,
                'Total Responses': len(responses_not_null),
                'No Response': null_count,
                'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
            }
            
            # Add counts and percentages for each rank (1-7)
            for rank in range(1, 8):
                count = counts.get(rank, 0)
                pct = (count / len(responses_not_null)) * 100 if len(responses_not_null) > 0 else 0
                result[f'Rank {rank}'] = f"{count} ({pct:.1f}%)"
                result[f'Rank {rank}_count'] = count
                result[f'Rank {rank}_pct'] = pct
            
            ranking_questions.append(result)
            all_ranking_data.append(result)
        
        # Open-ended questions
        else:
            open_ended_questions.append({
                'Survey': survey_type,
                'Question': col,
                'Total Responses': len(responses_not_null),
                'No Response': null_count,
                'Response Rate %': round((len(responses_not_null) / total_possible) * 100, 1)
            })
    
    print(f"  Total respondents: {total_possible}")
    print(f"  Importance questions: {len(importance_questions)}")
    print(f"  Ranking questions: {len(ranking_questions)}")
    print(f"  Open-ended questions: {len(open_ended_questions)}")
    
    survey_summaries.append({
        'Survey': survey_type,
        'Total Respondents': total_possible,
        'Importance Questions': len(importance_questions),
        'Ranking Questions': len(ranking_questions),
        'Open-Ended Questions': len(open_ended_questions)
    })
    
    return importance_questions, ranking_questions, open_ended_questions, total_possible

# Function to format Excel sheet
def format_sheet(ws, df, title):
    ws['A1'] = title
    ws['A1'].font = Font(size=14, bold=True, color="FFFFFF")
    ws['A1'].fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
    ws.merge_cells(f'A1:{get_column_letter(len(df.columns))}1')
    
    for col_num, column_title in enumerate(df.columns, 1):
        cell = ws.cell(row=2, column=col_num)
        cell.value = column_title
        cell.font = Font(bold=True, size=11)
        cell.fill = PatternFill(start_color="D9E1F2", end_color="D9E1F2", fill_type="solid")
        cell.alignment = Alignment(horizontal='center', vertical='center', wrap_text=True)
    
    for r_idx, row in enumerate(df.values, 3):
        for c_idx, value in enumerate(row, 1):
            cell = ws.cell(row=r_idx, column=c_idx)
            cell.value = value
            
            if c_idx == 1:
                cell.alignment = Alignment(horizontal='left', vertical='top', wrap_text=True)
            else:
                cell.alignment = Alignment(horizontal='center', vertical='center')
            
            thin_border = Border(
                left=Side(style='thin'),
                right=Side(style='thin'),
                top=Side(style='thin'),
                bottom=Side(style='thin')
            )
            cell.border = thin_border
            
            if r_idx % 2 == 0:
                cell.fill = PatternFill(start_color="F2F2F2", end_color="F2F2F2", fill_type="solid")
    
    ws.column_dimensions['A'].width = 60
    for col_num in range(2, len(df.columns) + 1):
        ws.column_dimensions[get_column_letter(col_num)].width = 15
    
    ws.freeze_panes = 'B3'

# Analyze all surveys
all_importance = []
all_ranking = []
all_open_ended = []

for survey_name, file_path in surveys.items():
    try:
        imp, rank, open_end, total = analyze_survey(file_path, survey_name)
        all_importance.extend(imp)
        all_ranking.extend(rank)
        all_open_ended.extend(open_end)
    except FileNotFoundError:
        print(f"⚠️  Warning: {file_path} not found. Skipping...")
    except Exception as e:
        print(f"⚠️  Error processing {file_path}: {str(e)}")

# Create comprehensive Excel file
print(f"\n{'='*80}")
print("Creating comprehensive Excel file...")
print('='*80)

with pd.ExcelWriter('all_surveys_analysis.xlsx', engine='openpyxl') as writer:
    
    # Summary sheet
    if survey_summaries:
        summary_df = pd.DataFrame(survey_summaries)
        summary_df.to_excel(writer, sheet_name='Summary', index=False)
        ws = writer.sheets['Summary']
        format_sheet(ws, summary_df, 'SURVEY SUMMARY')
        print("✓ Added Summary sheet")
    
    # All importance questions
    if all_importance:
        # Create display dataframe (without _count and _pct columns)
        display_cols = [col for col in all_importance[0].keys() if not col.endswith('_count') and not col.endswith('_pct')]
        importance_display = [{k: v for k, v in row.items() if k in display_cols} for row in all_importance]
        
        importance_df = pd.DataFrame(importance_display)
        importance_df = importance_df.sort_values(['Survey', 'Extremely important'], 
                                                  key=lambda x: x if x.name == 'Survey' else x.str.extract(r'(\d+)')[0].astype(int), 
                                                  ascending=[True, False])
        importance_df.to_excel(writer, sheet_name='All Importance Questions', index=False, startrow=1)
        ws = writer.sheets['All Importance Questions']
        format_sheet(ws, importance_df, 'ALL IMPORTANCE QUESTIONS - BY SURVEY')
        print(f"✓ Added {len(importance_df)} importance questions")
    
    # All ranking questions
    if all_ranking:
        display_cols = [col for col in all_ranking[0].keys() if not col.endswith('_count') and not col.endswith('_pct')]
        ranking_display = [{k: v for k, v in row.items() if k in display_cols} for row in all_ranking]
        
        ranking_df = pd.DataFrame(ranking_display)
        ranking_df = ranking_df.sort_values(['Survey', 'Rank 1'], 
                                           key=lambda x: x if x.name == 'Survey' else x.str.extract(r'(\d+)')[0].astype(int), 
                                           ascending=[True, False])
        ranking_df.to_excel(writer, sheet_name='All Ranking Questions', index=False, startrow=1)
        ws = writer.sheets['All Ranking Questions']
        format_sheet(ws, ranking_df, 'ALL RANKING QUESTIONS - BY SURVEY')
        print(f"✓ Added {len(ranking_df)} ranking questions")
    
    # All open-ended questions
    if all_open_ended:
        open_ended_df = pd.DataFrame(all_open_ended)
        open_ended_df = open_ended_df.sort_values(['Survey', 'Response Rate %'], ascending=[True, False])
        open_ended_df.to_excel(writer, sheet_name='All Open-Ended Questions', index=False, startrow=1)
        ws = writer.sheets['All Open-Ended Questions']
        format_sheet(ws, open_ended_df, 'ALL OPEN-ENDED QUESTIONS - BY SURVEY')
        print(f"✓ Added {len(open_ended_df)} open-ended questions")

print(f"\n✓ Saved comprehensive analysis to 'all_surveys_analysis.xlsx'")

# CREATE VISUALIZATIONS
print(f"\n{'='*80}")
print("Creating interactive visualizations...")
print('='*80)

# Visualization 1: Survey Response Overview
if survey_summaries:
    summary_df = pd.DataFrame(survey_summaries)
    
    fig1 = go.Figure()
    fig1.add_trace(go.Bar(
        name='Total Respondents',
        x=summary_df['Survey'],
        y=summary_df['Total Respondents'],
        text=summary_df['Total Respondents'],
        textposition='auto',
        marker_color='#366092'
    ))
    
    fig1.update_layout(
        title='Survey Response Rates by Group',
        xaxis_title='Survey Group',
        yaxis_title='Number of Respondents',
        template='plotly_white',
        height=500
    )
    fig1.show()
    print("✓ Displayed Response Overview")

# Visualization 2: Top 10 Most Important Priorities (Across All Groups)
if all_importance:
    # Get top priorities by "Extremely important" percentage
    top_priorities = sorted(all_importance_data, key=lambda x: x['Extremely important_pct'], reverse=True)[:15]
    
    fig2 = go.Figure()
    
    for survey in ['Staff', 'Faculty', 'Student']:
        survey_data = [p for p in top_priorities if p['Survey'] == survey]
        if survey_data:
            fig2.add_trace(go.Bar(
                name=survey,
                x=[p['Question'][:50] + '...' for p in survey_data],
                y=[p['Extremely important_pct'] for p in survey_data],
                text=[f"{p['Extremely important_pct']:.1f}%" for p in survey_data],
                textposition='auto'
            ))
    
    fig2.update_layout(
        title='Top 15 Priorities: % Rating "Extremely Important"',
        xaxis_title='Question',
        yaxis_title='% Extremely Important',
        barmode='group',
        template='plotly_white',
        height=600,
        xaxis_tickangle=-45
    )
    fig2.write_html('viz_2_top_priorities.html')
    print("✓ Created viz_2_top_priorities.html")

# Visualization 3: Heatmap of Importance Levels
if all_importance:
    # Create a comparison heatmap for questions that appear in all three surveys
    # Group by question text (simplified) to find common questions
    questions_by_group = {}
    for item in all_importance_data:
        q_short = item['Question'][:80]
        if q_short not in questions_by_group:
            questions_by_group[q_short] = {}
        questions_by_group[q_short][item['Survey']] = item['Extremely important_pct']
    
    # Filter to questions with high priority (>60% extremely important)
    high_priority_questions = {q: v for q, v in questions_by_group.items() 
                              if any(pct > 60 for pct in v.values())}
    
    if high_priority_questions:
        questions = list(high_priority_questions.keys())[:20]  # Top 20
        surveys_list = ['Staff', 'Faculty', 'Student']
        
        heatmap_data = []
        for q in questions:
            row = [high_priority_questions[q].get(s, 0) for s in surveys_list]
            heatmap_data.append(row)
        
        fig3 = go.Figure(data=go.Heatmap(
            z=heatmap_data,
            x=surveys_list,
            y=[q[:60] + '...' for q in questions],
            colorscale='Blues',
            text=[[f"{val:.1f}%" for val in row] for row in heatmap_data],
            texttemplate='%{text}',
            textfont={"size": 10},
            colorbar=dict(title="% Extremely<br>Important")
        ))
        
        fig3.update_layout(
            title='Priority Heatmap: High-Importance Questions Across Groups',
            xaxis_title='Survey Group',
            yaxis_title='Question',
            template='plotly_white',
            height=800
        )
        fig3.write_html('viz_3_priority_heatmap.html')
        print("✓ Created viz_3_priority_heatmap.html")

# Visualization 4: Response Distribution Comparison
if all_importance:
    # Pick a few key questions and show full distribution
    top_3_questions = sorted(all_importance_data, key=lambda x: x['Extremely important_pct'], reverse=True)[:3]
    
    fig4 = make_subplots(
        rows=1, cols=3,
        subplot_titles=[q['Question'][:40] + '...' for q in top_3_questions]
    )
    
    importance_levels = ['Extremely important', 'Very important', 'Moderately important', 
                        'Slightly important', 'Not Important']
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd']
    
    for idx, q_data in enumerate(top_3_questions, 1):
        values = [q_data.get(f'{level}_pct', 0) for level in importance_levels]
        
        fig4.add_trace(
            go.Bar(
                x=importance_levels,
                y=values,
                name=q_data['Survey'],
                text=[f"{v:.1f}%" for v in values],
                textposition='auto',
                marker_color=colors,
                showlegend=(idx == 1)
            ),
            row=1, col=idx
        )
    
    fig4.update_layout(
        title=f'Response Distribution for Top 3 Priority Questions',
        template='plotly_white',
        height=500,
        showlegend=True
    )
    fig4.update_xaxes(tickangle=-45)
    fig4.write_html('viz_4_response_distribution.html')
    print("✓ Created viz_4_response_distribution.html")

print(f"\n{'='*80}")
print("✅ ANALYSIS COMPLETE!")
print('='*80)
print("\nGenerated Files:")
print("  📊 all_surveys_analysis.xlsx - Comprehensive Excel analysis")
print("  📈 viz_1_response_overview.html - Response rates")
print("  📈 viz_2_top_priorities.html - Top 15 priorities comparison")
print("  📈 viz_3_priority_heatmap.html - Priority heatmap")
print("  📈 viz_4_response_distribution.html - Response distributions")
print("\nOpen the HTML files in your browser to explore interactive visualizations!")
print('='*80)


Analyzing Staff Survey...
  Total respondents: 20
  Importance questions: 78
  Ranking questions: 11
  Open-ended questions: 5

Analyzing Faculty Survey...
  Total respondents: 112
  Importance questions: 79
  Ranking questions: 12
  Open-ended questions: 11

Analyzing Student Survey...
  Total respondents: 220
  Importance questions: 79
  Ranking questions: 4
  Open-ended questions: 20

Creating comprehensive Excel file...
✓ Added Summary sheet
✓ Added 236 importance questions
✓ Added 27 ranking questions
✓ Added 36 open-ended questions

✓ Saved comprehensive analysis to 'all_surveys_analysis.xlsx'

Creating interactive visualizations...


✓ Displayed Response Overview
✓ Created viz_2_top_priorities.html
✓ Created viz_3_priority_heatmap.html
✓ Created viz_4_response_distribution.html

✅ ANALYSIS COMPLETE!

Generated Files:
  📊 all_surveys_analysis.xlsx - Comprehensive Excel analysis
  📈 viz_1_response_overview.html - Response rates
  📈 viz_2_top_priorities.html - Top 15 priorities comparison
  📈 viz_3_priority_heatmap.html - Priority heatmap
  📈 viz_4_response_distribution.html - Response distributions

Open the HTML files in your browser to explore interactive visualizations!
