# Demographics

In [None]:
import pandas as pd

In [None]:
from metadata import ID_QUESTION_MAP, QUESTION_ID_MAP
from plotting import plot_single_cat

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
df = pd.read_csv('data/base1.csv', index_col='resp_id')
start_id = 0
end_id = 7
df = df[df.columns[start_id:end_id]]
df.head()

In [None]:
for i,q in enumerate(list(QUESTION_ID_MAP.keys())[start_id:end_id]):
    print(f"Q{i+1}: {q}")

### Q1: Which age group do you belong to?

In [None]:
print(ID_QUESTION_MAP['Q1'])
plot_single_cat(df['Q1'], drop_nans=True, custom_order=[3, 0, 1, 2, 4, 5], ticks_rotation=45, save_path="figures/q1.png")

### Q2:  What is your gender identity?

In [None]:
print(ID_QUESTION_MAP['Q2'])
plot_single_cat(df['Q2'], drop_nans=True, custom_order=None, ticks_rotation=45, save_path="figures/q2.png")

### Q3 + Q4: In which country are you currently employed? + Other

In [None]:
orig_q = df.Q3.copy()
other_q = df.Q4.copy()
combined_q = orig_q.copy()

combined_q[orig_q == "Other"] = other_q[~other_q.isna()]
combined_q[combined_q == "Deutschland"] = "Germany"

print(ID_QUESTION_MAP['Q3'])
print(ID_QUESTION_MAP['Q4'])
plot_single_cat(combined_q, drop_nans=True, custom_order=None, ticks_rotation=45, save_path="figures/q3_q4.png")

### Q5: Which of the following best describes your current work status?

In [None]:
print(ID_QUESTION_MAP['Q5'])
plot_single_cat(df['Q5'], drop_nans=True, custom_order=None, ticks_rotation=45, save_path="figures/q5.png")

### Q6 + Q7: Which of the following best represents your highest level of education? + Other

In [None]:
orig_q = df.Q6.copy()
other_q = df.Q7.copy()
combined_q = orig_q.copy()
print(len(other_q[~other_q.isna()]))
for other in other_q[~other_q.isna()]:
    print(other)
    
print(ID_QUESTION_MAP['Q6'])
print(f"({ID_QUESTION_MAP['Q7']})")
plot_single_cat(df['Q6'], drop_nans=True, custom_order=[3,2,1,0,4,6,5], ticks_rotation=90, save_path="figures/q6_q7.png")

# Output For Table

In [None]:
def print_table_info(series, custom_order=None):    
    val_counts = series.value_counts(sort=False, dropna=False)
    unique_answer_labels = val_counts.index.tolist()
    unique_answer_labels = [str(ele) for ele in unique_answer_labels]
    nbr_answered = val_counts.values.tolist()
    
    if custom_order:
        zipped = zip(unique_answer_labels, nbr_answered, custom_order)
        sorted_vals = sorted(zipped, key=lambda x: x[2])
        unique_answer_labels = [ele[0] for ele in sorted_vals]
        nbr_answered = [ele[1] for ele in sorted_vals]
    
    percentages = [round((x/len(series))*100, 1) for x in nbr_answered]
    
    
    to_print = sorted(zip(unique_answer_labels,
                          nbr_answered,
                          percentages
                          ), key=lambda x: x[1], reverse=True)
    for p in to_print:
        print(p[0], p[1], p[2])
    print('-'*20)
    

In [None]:
print(ID_QUESTION_MAP['Q1'])
print_table_info(df.Q1, custom_order=[3, 0, 1, 2, 6, 4, 5])

print(ID_QUESTION_MAP['Q2'])
print_table_info(df.Q2, custom_order=[3, 0, 1, 2])


print(ID_QUESTION_MAP['Q3']) 
print_table_info(df.Q3, custom_order=[2, 0, 1])
# Q4 would be the others.

print(ID_QUESTION_MAP['Q5']) 
print_table_info(df.Q5, custom_order=[4, 0, 1, 2, 3])

print(ID_QUESTION_MAP['Q6']) 
print_table_info(df.Q6, custom_order=[7,3,2,1,0,4,6,5])