In [130]:
%matplotlib inline

In [379]:
import pandas as pd

def qanswers(q_num):
    return data[data["question id"] == q_num]\
     .loc[:, ["user_key", "question text", "answer_cleaned"]]\
     .groupby(["question text", "answer_cleaned"])\
     .agg("count")\
     .groupby(level=0)\
     .apply(lambda x : x / x.sum()) * 100



data = pd.read_csv("answers_clean.csv")

gender_questions = data[data["question text"] == "What is your gender?"]
gender_questions = gender_questions.set_index("user_key")
genders = gender_questions.loc[:,"answer"]

age_questions = data[data["question text"] == "What is your year of birth?"]
age_questions = age_questions.set_index("user_key")
ages = age_questions.loc[:, "answer"]

data = data.join(genders, on="user_key", rsuffix="_gender")
data = data.join(ages, on="user_key", rsuffix="_yob")

data = data.rename({"answer_gender": "gender", "answer_yob": "birth_year"}, axis=1)
data["birth_year"] = data["birth_year"].astype(float)
data["age"] = 2019 - data["birth_year"]



How many people have ever answered a question on Khetha?

In [132]:
len(set(data.user_key))

670

Gender and age breakdown of all people who answered those questions

In [153]:
gender_questions.groupby("answer").agg("count")

Unnamed: 0_level_0,submission id,submission complete,timestamp,task slug,question id,question text,answer_cleaned,Province
answer,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Female,255,255,255,255,255,255,255,215
I would rather not say,3,3,3,3,3,3,3,2
Male,281,281,281,281,281,281,281,235
Other,1,1,1,1,1,1,1,0


Spatial distribution of all people for whom we have that info

In [151]:
data.loc[:, ["Province", "user_key"]].drop_duplicates().groupby("Province").agg("count")


Unnamed: 0_level_0,user_key
Province,Unnamed: 1_level_1
Eastern Cape,37
Free State,24
Gauteng,233
KwaZulu-Natal,56
Limpopo,28
Mpumalanga,20
North West,20
Northern Cape,8
Western Cape,35


Total people who may have joined the app for the first few questions (e.g. Big Debate) but never answered the demographic and location questions. - People who haven't answered the gender question

In [169]:
data.loc[:, ["question id", "question text"]]
DEMOGRAPHICS_SLUG = "tell-us-about-yourself"
demographics_questions = data[data["task slug"] == DEMOGRAPHICS_SLUG]
demographics_users = len(set(demographics_questions["user_key"]))
all_users = len(set(data["user_key"]))

print(f"All users: {all_users}, no demographics: {all_users - demographics_users}")

All users: 670, no demographics: 122


How many people completed all questions and modules

In [188]:
data.loc[:, ["user_key", "task slug"]]\
 .drop_duplicates()\
 .groupby("user_key")\
 .agg("count")\
 .reset_index()\
 .sort_values("task slug")\
 .groupby("task slug")\
 .agg("count")

Unnamed: 0_level_0,user_key
task slug,Unnamed: 1_level_1
1,82
2,22
3,73
4,29
5,31
6,39
7,28
8,31
9,11
10,15


Total number of respondents for each module (so we can see drop-off by module)

In [199]:
data.loc[:, ["user_key", "task slug"]]\
 .drop_duplicates()\
 .reset_index()\
 .groupby("task slug")\
 .agg("count")\
 .rename(columns={"index" : "count"})\
 .loc[:, "count"]

task slug
BD-1                                  162
BD-2                                  156
BD-3                                  154
BD-4                                  159
BD-5                                  153
BD-6                                  154
BD-7                                  153
BD-8                                  157
Constitution                          295
Voting-2                              297
barrier-to-voting                     286
contact-details                       502
election-importance                   482
fact-or-emotion                       334
have-your-say                         172
help-someone-vote                     225
how-do-you-feel-about-south-africa    175
inter-generational-conversation       255
meet-your-local-party                 188
parties-competing                     428
policy-positions-manisfestos          314
post-election-follow-up               199
priority-policy-issues                248
reasons-for-supporting-p

For each module, how many people started the module but didn’t finish it (if that can be seen)

In [238]:
task_questions = data\
 .loc[:, ["task slug", "question text"]]\
 .drop_duplicates()\
 .groupby("task slug")\
 .agg("count")

user_answers = data.loc[:, ["user_key", "task slug", "question text"]]\
 .groupby(["user_key", "task slug"])\
 .agg("count")

task_responses = user_answers.join(task_questions, on="task slug", rsuffix="_right")
task_responses["complete"] = task_responses["question text"] == task_responses["question text_right"]
task_responses.reset_index()\
 .loc[:, ["user_key", "task slug", "complete"]]\
 .groupby(["task slug", "complete"])\
 .agg("count")

Unnamed: 0_level_0,Unnamed: 1_level_0,user_key
task slug,complete,Unnamed: 2_level_1
BD-1,True,162
BD-2,True,156
BD-3,True,154
BD-4,True,159
BD-5,True,153
BD-6,True,154
BD-7,True,153
BD-8,True,157
Constitution,True,295
Voting-2,False,3


How many people clicked through from FB posts and then a) answered some questions b) did not convert into any participation beyond the click

* What is your MAIN reason for choosing a party to vote for? (all responses before 8 May)
* What was your main reason for choosing the party you voted for? (all responses after elections)

In [266]:
data.loc[:, ["question id", "question text"]].set_index("question id")#.reset_index()#.drop_duplicates()
Q_MAIN_REASON_BEFORE = 26
Q_MAIN_REASON_AFTER = 151
reasons_before = data[data["question id"] == Q_MAIN_REASON_BEFORE].loc[:, ["user_key", "answer"]].set_index("user_key")
reasons_after = data[data["question id"] == Q_MAIN_REASON_AFTER].loc[:, ["user_key", "answer"]].set_index("user_key")
answers_join = reasons_before.join(reasons_after, rsuffix="_after")
answers_join.reset_index().groupby(["answer", "answer_after"]).agg("count").rename(columns={"user_key": "# users"})

Unnamed: 0_level_0,Unnamed: 1_level_0,# users
answer,answer_after,Unnamed: 2_level_1
I agree with their proposed policies on issues that are important to me,I did not vote,7
I agree with their proposed policies on issues that are important to me,I had to vote (no particular reason),1
I agree with their proposed policies on issues that are important to me,I wanted the policies I cared about to be actioned,12
I agree with their proposed policies on issues that are important to me,I wanted to support the party I felt has done the most for the country,6
I agree with their proposed policies on issues that are important to me,I wanted to support the party that has helped me and my family,3
I agree with their proposed policies on issues that are important to me,We needed change in leadership,9
I am part of an organisation that supports this party,I wanted to support the party that has helped me and my family,1
I trust the party representatives,I had to vote (no particular reason),1
I trust the party representatives,I wanted the policies I cared about to be actioned,2
I trust the party representatives,I wanted to support the party I felt has done the most for the country,4


By percentage, how many respondents chose each answer option for the following questions:
    * Compared to six months ago, did any of the following change your mind about whether to vote?
    * Compared to six months ago, did any of the following change your mind about which party to vote for?
    * Compared to six months ago, did any of the following change your mind about reasons for choosing a party to vote for?


In [325]:
data.loc[:, ["question id", "question text"]].drop_duplicates()
Q_1 = 152
Q_2 = 153
Q_3 = 154

q1 = data[data["question id"] == Q_1]
data[data["question id"] == Q_1]\
 .loc[:, ["user_key", "answer"]]\
 .groupby("answer")\
 .agg("count")/len(q1) * 100

data[(data["question id"] == Q_1) | (data["question id"] == Q_2) | (data["question id"] == Q_3)]\
 .loc[:, ["user_key", "question text", "answer"]]\
 .groupby(["question text", "answer"])\
 .agg("count").groupby(level=0).apply(lambda x : x / x.sum()) * 100

Unnamed: 0_level_0,Unnamed: 1_level_0,user_key
question text,answer,Unnamed: 2_level_1
"Compared to six months ago, did any of the following change your mind about reasons for choosing a party to vote for?","Something I heard or saw in the media (TV, radio, newspapers, etc.)",25.405405
"Compared to six months ago, did any of the following change your mind about reasons for choosing a party to vote for?","Something I heard or saw on social media (twitter, facebook, Instagram, etc.)",23.783784
"Compared to six months ago, did any of the following change your mind about reasons for choosing a party to vote for?",Something I heard or saw when attending a political rally or political party meeting,15.135135
"Compared to six months ago, did any of the following change your mind about reasons for choosing a party to vote for?",Something I read or an activity I did on this app/Khetha,15.135135
"Compared to six months ago, did any of the following change your mind about reasons for choosing a party to vote for?",Something my friends and family members told me,7.567568
"Compared to six months ago, did any of the following change your mind about reasons for choosing a party to vote for?",Talking to a party representative,12.972973
"Compared to six months ago, did any of the following change your mind about whether to vote?","Something I heard or saw in the media (TV, radio, newspapers, etc.)",24.468085
"Compared to six months ago, did any of the following change your mind about whether to vote?","Something I heard or saw on social media (twitter, facebook, Instagram, etc.)",22.87234
"Compared to six months ago, did any of the following change your mind about whether to vote?",Something I heard or saw when attending a political rally or political party meeting,15.957447
"Compared to six months ago, did any of the following change your mind about whether to vote?",Something I read or an activity I did on this app/Khetha,16.489362


We also need to see if there’s any relation between the planned party to vote for and the actual party voted for by respondents both at national and provincial level by %. For instance, the % of people who had planned to vote for a particular party and actually voted for a different party. On the following questions

* Which party did you originally plan to vote for at national level?
* Which party did you actually vote for at national level?


In [348]:
QN_PLAN = 157
QN_ACTUAL = 158


qn_plan = data[data["question id"] == QN_PLAN].loc[:, ["user_key", "answer_cleaned"]].set_index("user_key")
qn_actual = data[data["question id"] == QN_ACTUAL].loc[:, ["user_key", "answer_cleaned"]].set_index("user_key")

national = qn_plan\
 .join(qn_actual, rsuffix="_after")\
 .rename(columns={"answer_cleaned" : "Planned", "answer_cleaned_after": "Actual"})\
 .reset_index()\
 .groupby(["Planned", "Actual"])\
 .agg("count")\
 .rename(columns={"user_key": "Counts"})

national


Unnamed: 0_level_0,Unnamed: 1_level_0,Counts
Planned,Actual,Unnamed: 2_level_1
ACDP,ACDP,3
ANC,ANC,66
ANC,DA,3
ANC,EFF,9
ANC,I rather not say it,1
ANC,,8
ATM,ATM,1
DA,ANC,4
DA,DA,9
DA,Good,1


* Which party did you originally plan to vote for at provincial level?
* Which party did you actually vote for at provincial level?

In [347]:
QP_PLAN = 159
QP_ACTUAL = 160

qp_plan = data[data["question id"] == QP_PLAN].loc[:, ["user_key", "answer_cleaned"]].set_index("user_key")
qp_actual = data[data["question id"] == QP_ACTUAL].loc[:, ["user_key", "answer_cleaned"]].set_index("user_key")

provincial = qn_plan\
 .join(qn_actual, rsuffix="_after")\
 .rename(columns={"answer_cleaned" : "Planned", "answer_cleaned_after": "Actual"})\
 .reset_index()\
 .groupby(["Planned", "Actual"])\
 .agg("count")\
 .rename(columns={"user_key": "Counts"})

provincial

Unnamed: 0_level_0,Unnamed: 1_level_0,Counts
Planned,Actual,Unnamed: 2_level_1
ACDP,ACDP,3
ANC,ANC,66
ANC,DA,3
ANC,EFF,9
ANC,I rather not say it,1
ANC,,8
ATM,ATM,1
DA,ANC,4
DA,DA,9
DA,Good,1


Would you say that the country is going in the wrong direction or going in the right direction?

In [383]:
qanswers(166)

Unnamed: 0_level_0,Unnamed: 1_level_0,user_key
question text,answer_cleaned,Unnamed: 2_level_1
Would you say that the country is going in the wrong direction or going in the right direction?,Going in the right direction,33.142857
Would you say that the country is going in the wrong direction or going in the right direction?,Going in the wrong direction,9.714286
Would you say that the country is going in the wrong direction or going in the right direction?,"Some things are going right, and some things are going wrong",57.142857


In [384]:
qanswers(169)

Unnamed: 0_level_0,Unnamed: 1_level_0,user_key
question text,answer_cleaned,Unnamed: 2_level_1
"When you think of the institutional and leadership capacity South Africa (including capacity in government, civil society and the economy) compared to a year ago, is it:",Getting stronger,45.882353
"When you think of the institutional and leadership capacity South Africa (including capacity in government, civil society and the economy) compared to a year ago, is it:",Getting weaker,21.764706
"When you think of the institutional and leadership capacity South Africa (including capacity in government, civil society and the economy) compared to a year ago, is it:",Staying the same,32.352941


In [386]:
qanswers(170)

Unnamed: 0_level_0,Unnamed: 1_level_0,user_key
question text,answer_cleaned,Unnamed: 2_level_1
"When you think of social inequality in South Africa, compared to a year ago, is it:",Decreasing,23.668639
"When you think of social inequality in South Africa, compared to a year ago, is it:",Increasing,43.195266
"When you think of social inequality in South Africa, compared to a year ago, is it:",Staying the same,33.136095


In [387]:
qanswers(171)

Unnamed: 0_level_0,Unnamed: 1_level_0,user_key
question text,answer_cleaned,Unnamed: 2_level_1
"When you think of how citizens are feeling and acting about the country, is there a sense of:",None of the above,11.83432
"When you think of how citizens are feeling and acting about the country, is there a sense of:",Reconciliation (coming together towards a shared understanding and common goal),25.443787
"When you think of how citizens are feeling and acting about the country, is there a sense of:",Resentment (complaining and withdrawing into private or divided spaces),31.952663
"When you think of how citizens are feeling and acting about the country, is there a sense of:",Resistance (organising and fighting against a state or groups in society that are considered illegitimate),30.769231
