In [4]:
import os
import csv
import pandas as pd
import glob
from ftplib import FTP

In [5]:
directory = r'R:\RawData\Elite Star\React Stores\Client Download Folders'

In [None]:
def fetch_files_from_ftp(ftp_host, ftp_port=22, ftp_user, ftp_password, folders_to_download, download_path):
    # Connect to the FTP server
    ftp = FTP()
    ftp.connect(ftp_host, ftp_port)
    ftp.login(user=ftp_user, passwd=ftp_password)

    # Fetch files from the specified folders
    for folder in folders_to_download:
        ftp.cwd(folder)
        file_list = ftp.nlst()
        for file_name in file_list:
            local_file_path = os.path.join(download_path, file_name)
            with open(local_file_path, 'wb') as file:
                ftp.retrbinary('RETR ' + file_name, file.write)

ftp_host = 'sftp.infores.com'
ftp_user = 'tsello01'
ftp_password = os.getenv('FTP_PASSWORD') 
folders_to_download = [
    "/euetrac1ftp/Up/A1 SUPERMARKET/",
    "/euetrac1ftp/Up/Platinum/",
    "/euetrac1ftp/Up/Premjee/"
]
download_path = r''

fetch_files_from_ftp(ftp_host, ftp_user, ftp_password, folders_to_download, download_path)

In [None]:
def delete_all_except_recent(directory):
    # Get a list of all files in the directory and its subdirectories
    files = []
    for root, dirs, filenames in os.walk(directory):
        for filename in filenames:
            files.append(os.path.join(root, filename))

    # Sort the files by modification time in descending order
    files.sort(key=os.path.getmtime, reverse=True)

    most_recent_files = {}
    deleted_files = []
    kept_files = []

    # Keep the most recent file in each directory and delete the rest
    for file in files:
        # Get the directory of the file
        directory = os.path.dirname(file)

        # If there is only one file in the directory, skip it
        if len(glob.glob(os.path.join(directory, "*"))) == 1:
            kept_files.append(os.path.basename(file))
            continue

        # If the directory is not in the most_recent_files dictionary, add it
        if directory not in most_recent_files:
            most_recent_files[directory] = file
            kept_files.append(os.path.basename(file))
        else:
            # If the file is older than the most recent file in the directory, delete it
            if os.path.getmtime(file) < os.path.getmtime(most_recent_files[directory]):
                os.remove(file)
                deleted_files.append(os.path.basename(file))
            else:
                # Otherwise, update the most recent file in the directory
                os.remove(most_recent_files[directory])
                deleted_files.append(os.path.basename(most_recent_files[directory]))
                most_recent_files[directory] = file
                kept_files.append(os.path.basename(file))

    # Keep the most recent file in each directory
    for directory, most_recent_file in most_recent_files.items():
        files_in_directory = glob.glob(os.path.join(directory, "*"))
        for file in files_in_directory:
            if file != most_recent_file:
                os.remove(file)
                deleted_files.append(os.path.basename(file))
            else:
                kept_files.append(os.path.basename(file))

    # Create a Pandas DataFrame of the deleted files
    df_deleted = pd.DataFrame(deleted_files, columns=["Deleted Files"])
    df_deleted["Deleted Files"] = df_deleted["Deleted Files"].apply(lambda x: os.path.basename(x))
    df_deleted = df_deleted.sort_values(by=["Deleted Files"])

    # Create a Pandas DataFrame of the kept files with only the last two elements in the file path
    df_kept = pd.DataFrame(kept_files, columns=["Kept Files"])
    df_kept["Kept Files"] = df_kept["Kept Files"].apply(lambda x: os.path.join(*os.path.split(x)[-2:]))
    df_kept = df_kept.drop_duplicates()
    
    return df_deleted, df_kept

In [None]:
delete_df, kept_df = delete_all_except_recent(directory)

In [None]:
def delete_rows_starting_with(directory, character):
    deleted_rows = []
    modified_files = []

    # Walk through the directory and its subdirectories
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".csv"):
                filename = os.path.join(root, file)
                with open(filename, "r") as f:
                    reader = csv.reader(f)
                    rows = list(reader)

                # Check if the last row starts with the given character
                if rows and rows[-1][0].startswith(character):
                    # Remove the newline character from the second last row
                    rows[-2] = [cell.rstrip('\n') for cell in rows[-2]]

                    # Delete the last row
                    deleted_rows.append([os.path.basename(filename), rows[-1]])
                    rows = rows[:-1]

                    # If any rows were deleted, add the file to the list of modified files
                    modified_files.append(os.path.basename(filename))

                    # Write the updated rows back to the CSV file only if there were any rows deleted
                    if len(deleted_rows) > 0:
                        with open(filename, "w", newline="") as f:
                            writer = csv.writer(f)
                            writer.writerows(rows)

    # Convert the deleted rows and modified files to a pandas dataframe
    df = pd.DataFrame(deleted_rows, columns=["Filename", "Row Deleted Content"])
    df["Modified"] = df["Filename"].isin(modified_files)

    return df
