# FILES_RELATION_MAPPING
---
**09.05.2019**

**NOTE:** The goal is to find and group together files concerning the same objects that are in different folders, so that they can be easily processed together later.

## 1. Imports

In [55]:
from os import listdir
from os.path import isfile, join
from pprint import pprint
import pickle

## 2. Settings

In [40]:
BITALINO_RAW_DATA_SOURCE_DIR = "./data/raw_data/bitalino"
PROCEDURE_RAW_DATA_SOURCE_DIR = "./data/raw_data/procedura"

## 3. Auxiliary functions

In [41]:
def list_files_from_dir(dir_path):
    return [f for f in listdir(dir_path) if isfile(join(dir_path, f))]

In [42]:
def extract_id_from_file_name(file_name):
    return file_name[:4]

In [43]:
def browse_through_bitalino_files(dictionary):
    dir_path = BITALINO_RAW_DATA_SOURCE_DIR
    file_names = list_files_from_dir(dir_path)
    
    for file_name in file_names:
        object_id = extract_id_from_file_name(file_name)
        if object_id in dictionary:
            raise Exception('Id {} appears many times in bitalino data'.format(object_id))
        else:
            file_path = join(dir_path, file_name)
            files_bag = {"bitalino": file_path}
            dictionary[object_id] = files_bag
    return dictionary

In [44]:
def browse_through_procedure_files(dictionary):
    dir_path = PROCEDURE_RAW_DATA_SOURCE_DIR
    file_names = list_files_from_dir(dir_path)
    
    for file_name in file_names:
        object_id = extract_id_from_file_name(file_name)
        is_info_file = file_name[-8:-4] == "info"
        file_path = join(dir_path, file_name)
        
        if object_id not in dictionary:
            dictionary[object_id] = {}
        
        if(is_info_file):
            if "info" in dictionary[object_id]:
                raise Exception('Id {} appears many times in procedure data'.format(object_id))
            else:
                dictionary[object_id]["info"] = file_path
            
        else:
            if "procedure" in dictionary[object_id]:
                raise Exception('Id {} appears many times in procedure data'.format(object_id))
            else:
                dictionary[object_id]["procedure"] = file_path
    return dictionary

In [52]:
def filter_out_incomplete_data(dictionary):
    output = {}
    for key, data in dictionary.items():
        if (("bitalino" in data) and ("info" in data) and ("procedure" in data)):
            output[key] = data
    return output

In [56]:
def save_to_file(dictionary, file_name):
    pickle_out = open(file_name,"wb")
    pickle.dump(dictionary, pickle_out)
    pickle_out.close()

## 4. Construction of a dictionary

In [53]:
# 3054, 6855 files are broken - they were deleted

files = {}
files = browse_through_bitalino_files(files)
files = browse_through_procedure_files(files)
files = filter_out_incomplete_data(files)

{'1107': {'bitalino': './data/raw_data/bitalino/1107_opensignals_prawestanowisko_2019-04-19_09-11-49.txt',
          'info': './data/raw_data/procedura/1107_2019_Apr_19_0712_info.txt',
          'procedure': './data/raw_data/procedura/1107_2019_Apr_19_0712.txt'},
 '1153': {'bitalino': './data/raw_data/bitalino/1153_opensignals_lewestanowisko_2019-04-19_13-03-03.txt',
          'info': './data/raw_data/procedura/1153_2019_Apr_19_1259_info.txt',
          'procedure': './data/raw_data/procedura/1153_2019_Apr_19_1259.txt'},
 '1233': {'bitalino': './data/raw_data/bitalino/1233_opensignals_prawestanowisko_2019-04-16_13-06-39.txt',
          'info': './data/raw_data/procedura/1233_2019_Apr_16_1240_info.txt',
          'procedure': './data/raw_data/procedura/1233_2019_Apr_16_1240.txt'},
 '1400': {'bitalino': './data/raw_data/bitalino/1400_opensignals_lewestanowisko_2019-04-08_10-12-56.txt',
          'info': './data/raw_data/procedura/1400_2019_Apr_08_1000_info.txt',
          'procedure': '.

In [57]:
save_to_file(files, "file_map")