In [8]:
import csv
import boto3
import os
import json
import datetime

BUCKET = "relab-simulator-logs"
INPUT_FOLDER = "dati_NP_2301"
LOCAL_OUTPUT_FOLDER = "./data/output/"


In [9]:
def list_s3_files_using_client():
    """
    This functions list all files in s3 bucket.
    :return: files
    """

    s3_client = boto3.client("s3")
    bucket_name = BUCKET
    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix= INPUT_FOLDER)
    files = response.get("Contents")
    #for file in files:
        #print(f"file_name: {file['Key']}, size: {file['Size']}")
    return files

In [10]:
def get_Unix_Timestamp(timestamp):
    given_date = timestamp #"2022-06-20T13:23:36.978298"
    formated_date = datetime.datetime.strptime(given_date,"%Y-%m-%dT%H:%M:%S.%f")
    Unix_timestamp = datetime.datetime.timestamp(formated_date)
    #print("The Unix timestamp for the given input date is:")
    return Unix_timestamp

def get_absolute_timestamp(dictionary):
    timestamp = 0
    if "timestamp" in dictionary :
        
        timestamp = dictionary["timestamp"]
        if 'T' in str(timestamp):
            
            timestamp = get_Unix_Timestamp(timestamp)
            #print(line)
                
    elif "VehicleDynamics" in dictionary.keys() and "timestamp" in dictionary["VehicleDynamics"]:
        timestamp = dictionary["VehicleDynamics"]["timestamp"]
        if 'T' in str(timestamp):
            
            timestamp = get_Unix_Timestamp(timestamp)
    elif "person0" in dictionary.keys() and "timestamp" in dictionary["person0"]:
                timestamp = dictionary["person0"]["timestamp"]
                if 'T' in str(timestamp):
                    timestamp = get_Unix_Timestamp(timestamp)
    return timestamp
               
                
    

def get_first_timestamp_value(log_file):
    timestamp = 0
    with open(log_file) as f:
        while True:
        
            line = f.readline()
            
            line_data =  json.loads(line)
            if "timestamp" in line_data["msg"] :
                timestamp = line_data["msg"]["timestamp"]
                if 'T' in str(timestamp):
                    timestamp = get_Unix_Timestamp(timestamp)
                #print(line)
                break
            elif "VehicleDynamics" in line_data["msg"].keys() and "timestamp" in line_data["msg"]["VehicleDynamics"]:
                timestamp = line_data["msg"]["VehicleDynamics"]["timestamp"]
                if 'T' in str(timestamp):
                    timestamp = get_Unix_Timestamp(timestamp)
                #print(line)
                break
            elif "person0" in line_data["msg"].keys() and "timestamp" in line_data["msg"]["person0"]:
                timestamp = line_data["msg"]["person0"]["timestamp"]
                if 'T' in str(timestamp):
                    timestamp = get_Unix_Timestamp(timestamp)
            if not line:
                break
    return timestamp

def get_dict_keys(dictionary,prepend = ""):
    
    result =[]
    delimiter = "_"
    for key in dictionary.keys():
        if(isinstance(dictionary[key], dict)):
            result += get_dict_keys(dictionary[key], prepend + key + delimiter)
        else:
            result.append(prepend + key)
    return result

def get_dict_values(dictionary,prepend = ""):
    
    result =[]
    delimiter = "_"
    for key in dictionary.keys():
        if(isinstance(dictionary[key], dict)):
            result += get_dict_values(dictionary[key], prepend + key + delimiter)
        else:
            result.append(dictionary[key])
    return result


In [11]:
def export_log_to_csv(log_file):
    session = log_file.split('/')[-2]
    print("exporting log file for session ::: " + session)
    
    initial_timestamp = get_first_timestamp_value(log_file)
    emoj_timestamp_tracker = initial_timestamp
    
    csv_exporters = {}
    
    with open(log_file) as f:
        while True:
            
            line = f.readline()
            try:
                data = json.loads(line)
                topic = data['topic']
                if topic == "AITEK_EVENTS" and "Level_of_distraction" in data["msg"].keys():
                    continue
                if topic == "Emotions" :
                    keys_to_delete = []
                    for key in data["msg"].keys():
                        if key != "person0":
                            keys_to_delete.append(key)
                    for key in keys_to_delete:
                        del data["msg"][key]
                if topic not in csv_exporters.keys():
                    file_exists =os.path.isfile(LOCAL_OUTPUT_FOLDER + topic +".csv")
                    csv_exporters[topic] = []#{open("D:\\myfiles\welcome.txt", "w")
                    csv_exporters[topic].append( open(LOCAL_OUTPUT_FOLDER + topic +".csv", "a"))
                    csv_exporters[topic].append( csv.writer(csv_exporters[topic][0], delimiter=','))
                    headers = ['session','relative_timestamp'] + get_dict_keys(data['msg'])
                    if not file_exists: #file_exists:
                        csv_exporters[topic][1].writerow(headers)
        
                    


                absolute_timestamp = get_absolute_timestamp(data['msg'])

                if topic == "Emotions": #facciamo così perchè sul topic emotions manca il timestamp
                    emoj_timestamp_tracker += 1 #aggiungiamo un secondo perchè teoricvamente emoj pubblica ogni secondo
                    absolute_timestamp =  emoj_timestamp_tracker
                else:
                    if absolute_timestamp != 0:
                        emoj_timestamp_tracker = absolute_timestamp


                timestamp = absolute_timestamp - initial_timestamp


                values_to_write = [session,timestamp] +  get_dict_values(data['msg'])

                csv_exporters[topic][1].writerow(values_to_write)
                if not line:
                    break
            except Exception as e:
                print(line)
                print("Error: " + str(e))
                break
                
        for topic in csv_exporters.keys():
            csv_exporters[topic][0].close()
            
    

In [12]:
files = list_s3_files_using_client()

In [13]:
for file in files:
    #if we don't have the file locally we download it
    os.makedirs(LOCAL_OUTPUT_FOLDER + '/'.join(file['Key'].split('/')[0:-1]), exist_ok=True)
    if not os.path.isfile(LOCAL_OUTPUT_FOLDER + file['Key']):
        print("download " + LOCAL_OUTPUT_FOLDER + file['Key'])
        boto3.resource('s3').Bucket(BUCKET).download_file(file['Key'], LOCAL_OUTPUT_FOLDER + file['Key'] )
    export_log_to_csv(LOCAL_OUTPUT_FOLDER + file['Key'])
    

exporting log file for session ::: NMC1

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMC2

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMC3_

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMC4_NEWTO

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMW1

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMW1B

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMW2

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMW3_NEWTO

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NMW4_NEWTO

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NPC1

Error: Expecting value: line 1 column 1 (char 0)
exporting log file for session ::: NPC2

Error: Expecting value: line 1 column 1

In [14]:
BUCKET = "relab-simulator-logs"
OUTPUT_FOLDER = "Prepared_data"
files_to_load = ["AITEK_EVENTS.csv","DSS.csv","Emotions.csv","NP_UNIBO_FTD.csv","NP_UNITO_DCDC.csv","RL_VehicleDynamics.csv"]
LOCAL_OUTPUT_FOLDER = "data/output/"
for file in files_to_load:
    boto3.Session().resource('s3').Bucket(BUCKET).Object(OUTPUT_FOLDER + '/' + file).upload_file( LOCAL_OUTPUT_FOLDER + file)
