# Data export script

- @author Gong Zequn (zequn.gong@u.nus.edu)
- @create 2021.06.28
- @modify 2021.07.22

In [1]:
!pip install pymysql pandas



## Query from mysql database

In [14]:
# Common configs

class Configs:
    _db_host = 'mysql'
    _db_port = 3306
    _db_user = 'csqsiew'
    _db_passwd = 'u98x7v89asx'
    _db_name = 'singlishwords'
    _db_charset = 'utf8'

    _redis_host = 'redis'
    _redis_port = 6379

In [15]:
import libs.database as database

database.initConnections(Configs)

In [16]:
questions = database.getQuestions()
answers = database.getAnswers()
respondents = database.getRespondents()

## Use pandas to convert to dataframes

In [17]:
import pandas as pd

In [18]:
questions_df = pd.DataFrame.from_dict(questions.values())
answers_df = pd.DataFrame.from_dict(answers.values())
respondents_df = pd.DataFrame.from_dict(respondents.values())

In [19]:
questions_df.to_csv('./export-data/questions.csv', index=False)
answers_df.to_csv('./export-data/answers.csv', index=False)
respondents_df.to_csv('./export-data/respondents.csv', index=False)

## Use pandas to join them

> just an example, will use json to nest informations

In [20]:
qa = pd.merge(answers_df, questions_df, 
              left_on='question_id', 
              right_on='id', 
              suffixes=['_answer', '_question'])

In [21]:
qar = pd.merge(qa, respondents_df, 
         left_on='respondent_id', 
         right_on='id', 
         suffixes=['_qa', '_respondent'])

In [22]:
qar = qar.drop(columns=['id_answer', 'id_question', 'id'])

In [23]:
qar.head(5)

Unnamed: 0,association1,association2,association3,time_spend,question_id,respondent_id,word,age,gender,education,country_of_birth,country_of_residence,ethnicity,is_native,language_spoken,start_time,end_time,email
0,dwqd,,,3,1,1,4d,19,Female,PSLE,Singapore,Singapore,Malay,No,"[""Belarusian""]",2021-07-22 21:18:49,2021-07-22 21:19:15,test2@gmail.com
1,dwq,,,1,3,1,zor kang,19,Female,PSLE,Singapore,Singapore,Malay,No,"[""Belarusian""]",2021-07-22 21:18:49,2021-07-22 21:19:15,test2@gmail.com
2,drehwax,,,1,4,1,zor tang,19,Female,PSLE,Singapore,Singapore,Malay,No,"[""Belarusian""]",2021-07-22 21:18:49,2021-07-22 21:19:15,test2@gmail.com
3,yejt567u,,,1,5,1,zor teng,19,Female,PSLE,Singapore,Singapore,Malay,No,"[""Belarusian""]",2021-07-22 21:18:49,2021-07-22 21:19:15,test2@gmail.com
4,swrhvbcfdt2345,,,1,6,1,zzz,19,Female,PSLE,Singapore,Singapore,Malay,No,"[""Belarusian""]",2021-07-22 21:18:49,2021-07-22 21:19:15,test2@gmail.com


# As a nested json format

In [24]:
import json

In [25]:
results = {k: v for k, v in respondents.items()}
for k, v in results.items():
    results[k]['answers'] = []

In [26]:
for k, v in answers.items():
    results[v['respondent_id']]['answers'].append({
        'question': questions[v['question_id']]['word'],
        'associations': [v['association1'], v['association2'], v['association3']],
        'time_spend': v['time_spend'],
    })

In [28]:
with open('./export-data/data.json', 'w') as f:
    json.dump(results, f)