# Extract Raw report data from ObserveRTC

The output is a series of CSV files, one for each client type.

In [1]:
import sys
from dotenv import load_dotenv
from pymongo import MongoClient
from datetime import datetime, timedelta
import pandas as pd
import os
import json
import logging
import helperFunctions as hf

hf.setup()

outputFolder = "output_folder/rawReport/"
if not os.path.exists(outputFolder):
   os.makedirs(outputFolder)
   logging.info(f"The directory \"{outputFolder}\" is created!")

client = MongoClient(hf.getConnectionString())
database=client["observertc-reports"]
calls = database["calls"]
reports = database["reports"]

In [2]:
logging.info("Start extracting Unique Calls And Outcomes from the calls database.")

query = { "logging_type": {"$in" : ["COMMAND_SESSION_SUCCESS", "COMMAND_SESSION_FAILURE", "COMMAND_SESSION_FAILED_SETUP"]}}
          
df = pd.DataFrame(calls.find(query))

logging.info(f"Shape:  {df.shape}")

df.to_csv("output_folder/uniqueCallsAndOutcomes.csv", index=False)

2023-01-31 13:53:03 INFO     Start extracting Unique Calls And Outcomes from the calls database. 
2023-01-31 13:53:03 INFO     Shape:  (19222, 7) 


In [3]:
# Run through the Reports collection.

logging.info(f"Starting query for all clients")
for userId in hf.userIds:

    logging.info(f"Starting query for {userId}")
    
    query = {   "type": "CLIENT_EXTENSION_DATA", 
                "payload.extensionType" : 
                    { "$in" : [ "OUT_BOUND_RTC", 
                                "IN_BOUND_RTC", 
                                "REMOTE_OUT_BOUND_RTC", 
                                "REMOTE_IN_BOUND_RTC"]},
                "payload.userId": userId}

    cursor = reports.find(query)

    logging.info(f"Got data for {userId}. Converting to data frame.")

    dataSet = []
    for record in cursor:
        data = {}
        data["timestamp"] = record["payload"]["timestamp"]
        data["callId"] = record["payload"]["callId"]
        data["roomId"] = record["payload"]["roomId"]
        data["clientId"] = record["payload"]["clientId"]
        data["userId"] = record["payload"]["userId"]
        data["sampleSeq"] = record["payload"]["sampleSeq"]

        a = json.loads(record["payload"]["payload"])
        # https://stackoverflow.com/questions/38987/how-do-i-merge-two-dictionaries-in-a-single-expression
        data = {**data, **a["stats"]}

        dataSet.append(data)


    logging.info(f"{userId}, Dataset complete, size: {len(dataSet)}, converting types")
    if(len(dataSet) > 0):
        newData = pd.DataFrame(dataSet)
        newData["callId"]=newData["callId"].astype(str)
        newData["roomId"]=newData["roomId"].astype(str)
        newData["clientId"]=newData["clientId"].astype(str)
        newData["userId"]=newData["userId"].astype(str)
        logging.info(f"{userId}, Dataset converted, writing to file")
        newData.to_csv((outputFolder + userId + ".csv") , mode='w', header=True, index=False)

    

2023-01-31 13:53:03 INFO     Starting query for all clients 
2023-01-31 13:53:03 INFO     Starting query for c1-Normal 
2023-01-31 13:53:03 INFO     Got data for c1-Normal. Converting to data frame. 
2023-01-31 13:55:03 INFO     c1-Normal, Dataset complete, size: 508639, converting types 
2023-01-31 13:55:26 INFO     c1-Normal, Dataset converted, writing to file 
2023-01-31 13:55:40 INFO     Starting query for c2-TorNormal 
2023-01-31 13:55:40 INFO     Got data for c2-TorNormal. Converting to data frame. 
2023-01-31 13:56:08 INFO     c2-TorNormal, Dataset complete, size: 417036, converting types 
2023-01-31 13:56:20 INFO     c2-TorNormal, Dataset converted, writing to file 
2023-01-31 13:56:32 INFO     Starting query for c3-TorEurope 
2023-01-31 13:56:32 INFO     Got data for c3-TorEurope. Converting to data frame. 
2023-01-31 13:57:01 INFO     c3-TorEurope, Dataset complete, size: 414471, converting types 
2023-01-31 13:57:15 INFO     c3-TorEurope, Dataset converted, writing to file 
