In [1]:
import os

def read_dict_file(file_path):
    """Reads a .dict file and returns a dictionary mapping items to their index."""
    dictionary = {}
    with open(file_path, 'r') as f:
        for line in f:
            index, item = line.strip().split('\t')
            dictionary[item] = int(index)
    return dictionary

def process_file(input_file, entity_dict, relation_dict, output_file):
    """Processes the input file, mapping entities and relations to their indexes, and writes the result to output."""
    with open(input_file, 'r') as f, open(output_file, 'w') as out_f:
        for line in f:
            entity1, relation, entity2 = line.strip().split('\t')
            e1_index = entity_dict.get(entity1, None)
            rel_index = relation_dict.get(relation, None)
            e2_index = entity_dict.get(entity2, None)
            
            if e1_index is not None and rel_index is not None and e2_index is not None:
                out_f.write(f"{e1_index}\trel_{rel_index}\t{e2_index}\n")

def process_all_files(input_files, entity_dict, relation_dict, output_dir):
    """Processes the test, train, valid files and generates the corresponding output files."""
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    all_output_file = os.path.join(output_dir, 'all.txt')
    
    # Initialize all.txt for combined data
    with open(all_output_file, 'w') as all_f:
        pass
    
    for input_file in input_files:
        filename = os.path.basename(input_file)
        output_file = os.path.join(output_dir, filename)
        
        process_file(input_file, entity_dict, relation_dict, output_file)
        
        # Append the output to all.txt
        with open(output_file, 'r') as f:
            with open(all_output_file, 'a') as all_f:
                for line in f:
                    all_f.write(line)

# Paths to your files
entity_dict_file = 'entities.dict'
relation_dict_file = 'relations.dict'
input_files = ['test.txt', 'train.txt', 'valid.txt']
output_dir = 'results'

# Step 1: Read the entity and relation dictionaries
entity_dict = read_dict_file(entity_dict_file)
relation_dict = read_dict_file(relation_dict_file)

# Step 2: Process each file and generate the output in the "results" directory
process_all_files(input_files, entity_dict, relation_dict, output_dir)

print("Processing complete. Check the 'results' directory for output files.")


Processing complete. Check the 'results' directory for output files.
