In [20]:
import csv
import boto3
import os
import json
import datetime

BUCKET = "relab-simulator-logs"
INPUT_FOLDER = "DATI_NP_2301/dati_NP_2301"
LOCAL_OUTPUT_FOLDER = "./data/output/"
OUTPUT_FILE = LOCAL_OUTPUT_FOLDER + "LOG.csv"


In [21]:
def list_s3_files_using_client():
    """
    This functions list all files in s3 bucket.
    :return: files
    """

    s3_client = boto3.client("s3")
    bucket_name = BUCKET
    response = s3_client.list_objects_v2(Bucket=bucket_name, Prefix= INPUT_FOLDER)
    files = response.get("Contents")
    #for file in files:
        #print(f"file_name: {file['Key']}, size: {file['Size']}")
    return files

In [22]:
def get_Unix_Timestamp(timestamp):
    given_date = timestamp #"2022-06-20T13:23:36.978298"
    formated_date = datetime.datetime.strptime(given_date,"%Y-%m-%dT%H:%M:%S.%f")
    Unix_timestamp = datetime.datetime.timestamp(formated_date)
    #print("The Unix timestamp for the given input date is:")
    return Unix_timestamp

def get_absolute_timestamp(dictionary):
    timestamp = 0
    if "timestamp" in dictionary :
        
        timestamp = dictionary["timestamp"]
        if 'T' in str(timestamp):
            
            timestamp = get_Unix_Timestamp(timestamp)
            #print(line)
                
    elif "VehicleDynamics" in dictionary.keys() and "timestamp" in dictionary["VehicleDynamics"]:
        timestamp = dictionary["VehicleDynamics"]["timestamp"]
        if 'T' in str(timestamp):
            
            timestamp = get_Unix_Timestamp(timestamp)
    elif "person0" in dictionary.keys() and "timestamp" in dictionary["person0"]:
                timestamp = dictionary["person0"]["timestamp"]
                if 'T' in str(timestamp):
                    timestamp = get_Unix_Timestamp(timestamp)
    return timestamp
               
                
    

def get_first_timestamp_value(log_file):
    first_timestamp_relab = 0
    first_timestamp_unibo = 0
    with open(log_file) as f:
        while True:
        
            line = f.readline()
            
            line_data =  json.loads(line)
            if "timestamp_relab" in line_data and "timestamp_unibo" in line_data  :
                first_timestamp_relab = line_data["timestamp_relab"]
                first_timestamp_unibo = line_data["timestamp_unibo"]
                
                break
           
            if not line:
                break
    return [first_timestamp_relab, first_timestamp_unibo]

def get_dict_keys(dictionary,prepend = ""):
    
    result =[]
    delimiter = "_"
    for key in dictionary.keys():
        if(isinstance(dictionary[key], dict)):
            result += get_dict_keys(dictionary[key], prepend + key + delimiter)
        else:
            result.append(prepend + key)
    return result

def get_dict_values(dictionary,prepend = ""):
    
    result =[]
    delimiter = "_"
    for key in dictionary.keys():
        if(isinstance(dictionary[key], dict)):
            result += get_dict_values(dictionary[key], prepend + key + delimiter)
        else:
            result.append(dictionary[key])
    return result


In [23]:
def export_log_to_csv(log_file):
    session = log_file.split('/')[-2]
    print("exporting log file for session ::: " + session)
    
    [first_timestamp_relab, first_timestamp_unibo] = get_first_timestamp_value(log_file)
    timestamp_s = 0
    #emoj_timestamp_tracker = initial_timestamp
    
    
    file_exists =os.path.isfile(OUTPUT_FILE)
    
    with open(OUTPUT_FILE, "a") as output_csvfile:
        csv_writer = csv.writer(output_csvfile, delimiter=',')
        with open(log_file) as f:
            
            
            while True:
                line = f.readline()
                try:   
                    
                    data = json.loads(line)
                    
                    
                    if not file_exists: #file_exists:
                        headers = ['session','timestamp(s)', 'relative_relab_timestamp','relative_unibo_timestamp'] + get_dict_keys(data)
                        csv_writer.writerow(headers)
                        print (headers)
                        file_exists = True
                    relative_relab_timestamp = data["timestamp_relab"] - first_timestamp_relab 
                    relative_unibo_timestamp = data["timestamp_unibo"] - first_timestamp_unibo
                    
                    
                    
                    
                    values_to_write = [session,timestamp_s,relative_relab_timestamp,relative_unibo_timestamp] +  get_dict_values(data)
                    csv_writer.writerow(values_to_write)
                    timestamp_s += 1
                except Exception as e:
                    print(line)
                    print("Error: " + str(e))
                    break
    
    
    

In [24]:
files = list_s3_files_using_client()

In [26]:
BUCKET = "relab-simulator-logs"
OUTPUT_FOLDER = "Prepared_data"
files_to_load = ["LOG.csv"]
LOCAL_OUTPUT_FOLDER = "data/output/"
for file in files_to_load:
    boto3.Session().resource('s3').Bucket(BUCKET).Object(OUTPUT_FOLDER + '/LOG/' + file).upload_file( LOCAL_OUTPUT_FOLDER + file)
