In [None]:
import os
import glob
import time
import pandas as pd

In [None]:
def remove_except_recent(directory, days):
    # Get a list of all files in the directory and its subdirectories
    files = []
    for root, dirs, filenames in os.walk(directory):
        for filename in filenames:
            files.append(os.path.join(root, filename))

    threshold_time = time.time() - (days * 24 * 60 * 60)
    most_recent_files = {}
    deleted_files = []
    kept_files = []

    # Keep the most recent file in each directory
    for file in files:
        directory = os.path.dirname(file)

        # If there is only one file in the directory, skip it
        if len(glob.glob(os.path.join(directory, "*"))) == 1:
            kept_files.append(os.path.basename(file))
            continue

        # If the directory is not in the most_recent_files dictionary, add it
        if directory not in most_recent_files:
            most_recent_files[directory] = file
            kept_files.append(os.path.basename(file))
        else:
            # If the file is older than the most recent file in the directory and older than the threshold, delete it
            if os.path.getmtime(file) < os.path.getmtime(most_recent_files[directory]) and os.path.getmtime(file) < threshold_time:
                os.remove(file)
                deleted_files.append(os.path.basename(file))
                print(f"Deleted file: {file}")
            else:
                # Otherwise, update the most recent file in the directory
                os.remove(most_recent_files[directory])
                deleted_files.append(os.path.basename(most_recent_files[directory]))
                most_recent_files[directory] = file
                kept_files.append(os.path.basename(file))

    # Create a Pandas DataFrame of the deleted files
    df_deleted = pd.DataFrame(deleted_files, columns=["Deleted Files"])
    df_deleted["Deleted Files"] = df_deleted["Deleted Files"].apply(lambda x: os.path.basename(x))
    df_deleted = df_deleted.sort_values(by=["Deleted Files"])

    # Create a Pandas DataFrame of the kept files with only the last two elements in the file path
    df_kept = pd.DataFrame(kept_files, columns=["Kept Files"])
    df_kept["Kept Files"] = df_kept["Kept Files"].apply(lambda x: os.path.join(*os.path.split(x)[-2:]))
    df_kept = df_kept.drop_duplicates()

    return df_deleted, df_kept

In [None]:
directory = r'R:\RawData\PNP B2B\\Daily Client File Downloads'
days = 7

In [None]:
remove_except_recent(directory, days)