In [35]:
import os

In [38]:
def sort_and_save_top_p(input_file_path, k, p):
    # Check if the input file exists
    if not os.path.isfile(input_file_path):
        print(f"File '{input_file_path}' does not exist.")
        return

    # Read data from the input file
    with open(input_file_path, 'r') as file:
        lines = file.readlines()

    # Check if there are enough rows in the file
    if len(lines) < 2:
        print("File does not contain enough data rows.")
        return

    # Extract the column labels (first row)
    column_labels = lines[0].strip().split()

    # Check if k and p are valid column indices
    if k < 0 or k >= len(column_labels) or p < 0:
        print("Invalid column index or number of top rows.")
        return

    # Initialize an empty list to store valid rows
    valid_rows = []

    # Iterate over data rows to filter out invalid rows
    for line in lines[1:]:
        parts = line.strip().split()
        # Check if the row has at least k columns
        if len(parts) >= k:
            valid_rows.append(line)

    # Check if p is greater than the number of valid data rows
    if p > len(valid_rows):
        print("Number of top rows exceeds the number of valid data rows.")
        return

    # Sort the valid data based on the kth column in descending order
    sorted_lines = sorted(valid_rows, key=lambda line: float(line.strip().split()[k - 1]), reverse=True)

    # Take the top p rows
    top_p_lines = sorted_lines[:p]

    # Create a new file 'results_final.txt' in the same directory
    output_file_path = os.path.join(os.path.dirname(input_file_path), 'results_final.txt')

    # Write the sorted top p data, including column labels, to the new file
    with open(output_file_path, 'w') as output_file:
        output_file.write(' '.join(column_labels) + '\n')  # Write column labels
        output_file.writelines(top_p_lines)

    print(f"Top {p} rows sorted based on column {k} in descending order and saved to 'results_final.txt'.")


In [40]:
input_file_path = 'results.txt'  # Replace with the path to your input file
k = 4  # Replace with the desired column number
p = 20  # Replace with the desired number of top rows to save
sort_and_save_top_p(input_file_path, k, p)


Top 20 rows sorted based on column 4 in descending order and saved to 'results_final.txt'.
