In [30]:
from datetime import datetime
import firebase_admin
from pathlib import Path
from firebase_admin import credentials, firestore
from tqdm import tqdm
import os
import json

In [26]:
experiment_id = "h22"

# Fetch data from GCP

In [11]:
class FirestoreHandler:
    "Contains firestore connection and methods for creating, updating and retrieving data"

    def __init__(self):
        self._authenticate_firebase()

        # Authenticate firebase to connect to firestore
        self.firestore_collection = self.firestore_client.collection(
            "conversations-with-aida"
        )

    def _authenticate_firebase(self):
        "Authenticates firebase"
        if not firebase_admin._apps:
            json_path = Path("../aida-gcp-72cd8bcbcf5c.json")
            cred = credentials.Certificate(json_path.as_posix())
            firebase_admin.initialize_app(cred)

        self.firestore_client = firestore.client()

In [19]:
personas = ["intervju", "fika", "lite"]
fh = FirestoreHandler()
conv_collection = fh.firestore_client.collection(
    "conversations-with-aida"
)
start_date = "2022-05-29"
end_date = "2022-07-04"

downloaded_conversations = {}
for persona in personas:
    persona_conversations = {}
    convs_data = conv_collection.where("persona", "==", persona).where("development_testing", "==", False).where("created_at", ">=", start_date).where("created_at", "<=", end_date)
    for conv in tqdm(convs_data.stream()):
        conversation = conv.to_dict()
        message_collection = conv_collection.document(
            conversation["conversation_id"]
        ).collection("messages")
        docs = message_collection.stream()
        messages = {doc.id: doc.to_dict() for doc in docs}
        persona_conversations[conversation["conversation_id"]] = {
            "data": conversation,
            "messages": messages,
        }
    downloaded_conversations[persona] = persona_conversations


681it [00:44, 15.28it/s]
1476it [01:30, 16.30it/s]
370it [00:21, 17.37it/s]


# Format and store conversations

In [28]:
data_path = Path(f"../test_data/{experiment_id}")
if not data_path.exists():
    os.mkdir(data_path)

In [39]:
bot_role = "testee"
run_nbr = 1
id_to_runandidx = {}
for persona in personas:
    conv_nbr = 1
    txt_str = ""
    for conversation_id, content in downloaded_conversations[persona].items():
        if len(content["messages"])>1:
            id_to_runandidx[conversation_id] = (run_nbr, conv_nbr)
            conv_nbr += 1
            for message in content["messages"].values():
                if message["who"] == "bot":
                    if bot_role=="testee":
                        txt_str += "Testee:" + message["text_en"] + "\n"
                    else:
                        txt_str += "Other agent:" + message["text_en"] + "\n"
                else:
                    if bot_role=="testee":
                        txt_str += "Other agent:" + message["text_en"] + "\n"
                    else:
                        txt_str += "Testee:" + message["text_en"] + "\n"
            txt_str += "####\n"
    with open(f"../test_data/{experiment_id}/run_{run_nbr}.txt", "w", encoding="utf8") as f:
        f.write(txt_str)
    run_nbr += 1

In [24]:
print(len(downloaded_conversations["lite"]["e8TY94PrK7JaNFi2MDoM"]["messages"]))

1


# Store specifications

In [40]:
config = {
    "1": {
        "testee_id": "Emely 0.6 intervju",
        "conv_partner_id": "H22 visitor",
        "random_conv_start": False,
        "conv_length": -1,
        "amount_convs": -1,
        "conv_starter": "",
        "date_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    },
    "2": {
        "testee_id": "Emely 0.6 fika",
        "conv_partner_id": "H22 visitor",
        "random_conv_start": False,
        "conv_length": -1,
        "amount_convs": -1,
        "conv_starter": "",
        "date_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    },
    "3": {
        "testee_id": "Emely 0.6 lite",
        "conv_partner_id": "H22 visitor",
        "random_conv_start": False,
        "conv_length": -1,
        "amount_convs": -1,
        "conv_starter": "",
        "date_time": datetime.utcnow().strftime("%Y-%m-%d %H:%M:%S")
    }
}

with open(data_path / "experiment_config.json", "w") as f:
    json.dump(config, f, indent=4)