# Mind Survey Response Analysis

This notebook analyzes responses to questions about consciousness and phenomenal properties across different philosophical backgrounds.

## Contents
1. Data Loading and Initial Inspection
2. Overview of Responses
3. Analysis by Philosophical Background
4. Individual Question Analysis
5. Statistical Tests and Effect Sizes
6. Visualization of Results

In [1]:
print("test")

test


In [2]:
# Essential imports
import pandas as pd
import os
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.stats.multitest import multipletests
from tabulate import tabulate
import plotly.graph_objects as go

print("imports loaded")

imports loaded


In [3]:
# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

# Set plotting style - use this instead of the previous style settings
plt.style.use('default')  # Reset to default style
sns.set_style("whitegrid")  # Apply seaborn style

# Text display preferences
from IPython.display import HTML
HTML("""
<style>
    div#notebook { font-family: "Arial"; }
</style>
""")
print("configs set")

configs set


## 1. Data Loading and Initial Inspection

In [4]:
file_path = '../data/mind-survey-data.csv'
if os.path.exists(file_path):
    stats = os.stat(file_path)
    print("File exists!")
    print(f"Size: {stats.st_size} bytes")
else:
    import logging
    logging.warning("Make sure your data file exists and the file_path is correct")

File exists!
Size: 217714 bytes


In [None]:
# Load the data
df = pd.read_csv(file_path)
# Display basic information about the dataset
print(f"Number of responses: {len(df)}")
print("\nColumn names:")
for col in df.columns:
    print(f"- {col}")

### Available Quesions

In [None]:
# Get list of questions (excluding metadata)
questions = [col for col in df.columns if col not in [
    'Timestamp',
    'What is your background in philosophy',
    'Where in the world did most of your childhood education and upbringing take place? ',
    'I would describe my views on mind as'
]]

# Print available questions
print("Available questions (you can copy and paste these for use in code):")
for i, q in enumerate(questions, 1):
    print(q)

## 2. Overview of Responses

In [None]:
# Count responses by philosophical background
background_counts = df['What is your background in philosophy'].value_counts()
print("Responses by philosophical background:")
print(background_counts)

In [None]:
def question_response_table(df, question):
    """Creates formatted tables of response statistics"""
    # Create response counts table
    counts_df = pd.DataFrame({
        'Response': df[question].value_counts().index,
        'Count': df[question].value_counts().values,
        'Percentage': (df[question].value_counts(normalize=True).round(3) * 100).map('{:.1f}%'.format)
    })
    # Create cross-tabulation
    cross_tab = pd.crosstab(
        df['What is your background in philosophy'],
        df[question],
        normalize='index'
    ).round(3) * 100
    # Format percentages and prepare crosstab
    cross_tab = cross_tab.map('{:.1f}%'.format)
    cross_tab = cross_tab.reset_index()
    cross_tab = cross_tab.rename(columns={'What is your background in philosophy': 'Background'})
    
    return counts_df, cross_tab

In [None]:
# Examples using this question
question = "I have thoughts"
# Example of tables output:
counts_df, cross_tab = question_response_table(df, question)
print("Response Counts:")
print(tabulate(counts_df, headers='keys', tablefmt='pretty', showindex=False))
print("\nResponses by Background:")
print(tabulate(cross_tab, headers='keys', tablefmt='pretty', showindex=False))

In [None]:
def make_pretty_table(df):
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=list(df.columns),
            fill_color='#440154',
            align='center',
            font=dict(color='white', size=12)
        ),
        cells=dict(
            values=[df[col] for col in df.columns],
            fill_color='#f7f7f9',
            align='center'
        )
    )])
    fig.update_layout(margin=dict(l=0, r=0, t=0, b=0))
    return fig

In [None]:
# Usage:
fig = make_pretty_table(counts_df)
fig.show()

In [None]:
def create_plotly_table(df, question):
    # Get the response counts and percentages
    counts_df = pd.DataFrame({
        'Response': df[question].value_counts().index,
        'Count': df[question].value_counts().values,
        'Percentage': (df[question].value_counts(normalize=True).round(3) * 100).map('{:.1f}%'.format)
    })
    
    fig = go.Figure(data=[go.Table(
        header=dict(
            values=list(counts_df.columns),
            fill_color='rgb(68, 1, 84)',
            align='center',
            font=dict(color='white', size=14),
            height=40
        ),
        cells=dict(
            values=[counts_df[col] for col in counts_df.columns],
            fill_color='rgb(247, 247, 249)',
            align='center',
            font=dict(size=12),
            height=40
        )
    )])
    
    # Increase the height multiplier and padding
    total_height = (len(counts_df) + 1) * 45 + 100  # Increased multiplier and padding
    
    fig.update_layout(
        title=f"Responses to: {question}",
        title_x=0.5,
        width=800,
        height=total_height,
        margin=dict(l=20, r=20, t=60, b=40),  # Increased bottom margin
        autosize=False
    )
    
    return fig

In [None]:
fig = create_plotly_table(df, question)
fig.show()

In [None]:
def question_response_piechart(df, question, ax):
    """Creates a pie chart of responses"""
    response_counts = df[question].value_counts()
    explode = [0.05] * len(response_counts)
    wedges, texts, autotexts = ax.pie(response_counts.values, 
                                   explode=explode,
                                   labels=response_counts.index,
                                   autopct='%1.1f%%',
                                   textprops={'fontsize': 10},
                                   pctdistance=0.85)
    # Improve pie chart label readability
    for text in texts:
        text.set_fontsize(10)
    for autotext in autotexts:
        autotext.set_fontsize(9)
    ax.set_title(f'Response Distribution\n{question}', pad=20, fontsize=12)
    return ax

In [None]:
# For just the pie chart:
fig, ax = plt.subplots(figsize=(12, 8))
question_response_piechart(df, question, ax)
plt.show()

In [None]:
def question_response_histogram(df, question, ax):
    """Creates a histogram of responses by philosophical background"""
    # Create ordered category mapping
    category_order = [
        'No background',
        'Some formal education in philosophy (high school or otherwise)',
        'Personal Interest (I read philosophy books and listen to philosophy podcasts)',
        'Formal university education in philosophy',
        'I am an academic full-time philosopher'
    ]
    # Create a copy with ordered categories
    df_plot = df.copy()
    df_plot['What is your background in philosophy'] = pd.Categorical(
        df_plot['What is your background in philosophy'],
        categories=category_order,
        ordered=True
    )
    # Create histogram
    sns.countplot(data=df_plot, 
                 x='What is your background in philosophy',
                 hue=question,
                 ax=ax,
                 order=category_order)
    # Formatting
    plt.setp(ax.get_xticklabels(), rotation=30, ha='right')
    ax.set_title('Responses by Philosophical Background', fontsize=12)
    ax.legend(bbox_to_anchor=(1.05, 1), 
             loc='upper left', 
             borderaxespad=0, 
             fontsize=10)
    return ax

In [None]:
# Example response histogram:
fig, ax = plt.subplots(figsize=(12, 8))
question_response_histogram(df, question, ax)
plt.tight_layout()  # This helps with the legend placement
plt.show()

In [None]:
def analyze_question_responses(df, question):
      print("TODO")

### Analyse all data

In [None]:
plt.style.use('seaborn-v0_8-whitegrid')
for question in questions:
    analyze_question_responses(df, question)

## 3. Response Analysis Functions

In [None]:
def analyze_question(df, question):
    """
    Analyze responses to a single question
    """
    # Overall response distribution
    print(f"\nAnalysis of: {question}")
    print("\nOverall response distribution:")
    print(df[question].value_counts(normalize=True).round(3))
    
    # Response distribution by background
    cross_tab = pd.crosstab(
        df['What is your background in philosophy'],
        df[question],
        normalize='index'
    ).round(3)
    
    print("\nResponses by background (proportions):")
    print(cross_tab)
    
    # Visualization
    plt.figure(figsize=(12, 6))
    sns.countplot(data=df, x=question, hue='What is your background in philosophy')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

In [None]:
def get_question_list(df):
    """
    Get list of survey questions (excluding metadata columns)
    """
    exclude_cols = [
        'Timestamp',
        'What is your background in philosophy',
        'Where in the world did most of your childhood education and upbringing take place? ',
        'I would describe my views on mind as'
    ]
    return [col for col in df.columns if col not in exclude_cols]

## 4. Individual Question Analysis

In [None]:
# Get list of questions
questions = get_question_list(df)

In [None]:
# Analyze first question as example
analyze_question(df, questions[0])

## 5. Statistical Analysis

In [None]:
def chi_square_test(df, question):
    """
    Perform chi-square test of independence between background and responses
    """
    contingency_table = pd.crosstab(
        df['What is your background in philosophy'],
        df[question]
    )
    chi2, p_val, dof, expected = stats.chi2_contingency(contingency_table)
    return {
        'question': question,
        'chi2': chi2,
        'p_value': p_val,
        'dof': dof
    }

# Run chi-square tests for all questions
statistical_results = []
for question in questions:
    result = chi_square_test(df, question)
    statistical_results.append(result)

# Create results DataFrame
results_df = pd.DataFrame(statistical_results)
results_df['significant'] = results_df['p_value'] < 0.05

# Display significant results
print("Significant differences found in responses to:")
significant_questions = results_df[results_df['significant']]
print(significant_questions[['question', 'p_value']].sort_values('p_value'))