## React

Downloads files associated with the data source and deletes all files except the most recent according to the DM. 

In [55]:
import logging
import os
import pandas as pd
import glob

from dotenv import load_dotenv
from ftplib import FTP, error_perm

In [56]:
# Configure the logger
logging.basicConfig(level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)    

In [57]:

def fetch_files_from_ftp(ftp_host, ftp_port, ftp_user, ftp_password, folders_to_download, download_path):
    try:
        with FTP() as ftp:
            ftp.connect(ftp_host, ftp_port)
            ftp.login(user=ftp_user, passwd=ftp_password)

            if not os.path.exists(download_path):
                os.makedirs(download_path)

            for folder in folders_to_download:
                ftp.cwd(folder)
                file_list = ftp.nlst()
                for file_name in file_list:
                    local_file_path = os.path.join(download_path, file_name)
                    with open(local_file_path, 'wb') as file:
                        ftp.retrbinary('RETR ' + file_name, file.write)
    except error_perm as e:
        logger.error(f"Permission error: {e}")
    except Exception as e:
        logger.error(f"An error occurred: {e}", exc_info=True)

In [58]:
# Load environment variables and fetch files from FTP
load_dotenv()
ftp_host = os.getenv('FTP_HOST')
ftp_port = int(os.getenv('FTP_PORT'))
ftp_user = os.getenv('FTP_USERNAME')
ftp_password = os.getenv('FTP_PASSWORD')

folders_to_download = [
    "euetrac1ftp/Up/A1 SUPERMARKET",
    "euetrac1ftp/Up/Platinum",
    "euetrac1ftp/Up/Premjee"
]

directory = r'R:\RawData\Elite Star\React Stores\Client Download Folders'

In [59]:
def create_file_dataframe(file_list, column_name, format_function):
    df = pd.DataFrame(file_list, columns=[column_name])
    df[column_name] = df[column_name].apply(format_function)
    df = df.sort_values(by=[column_name])
    df.reset_index(drop=True, inplace=True)
    df.index += 1
    return df

In [60]:
def remove_all_except_recent_files(directory):
    files = []
    for root, dirs, filenames in os.walk(directory):
        for filename in filenames:
            files.append(os.path.join(root, filename))

    files.sort(key=os.path.getmtime, reverse=True)

    most_recent_files = {}
    deleted_files = []
    kept_files = []

    for file in files:
        directory = os.path.dirname(file)

        if len(glob.glob(os.path.join(directory, "*"))) == 1:
            kept_files.append(os.path.basename(file))
            continue

        if directory not in most_recent_files:
            most_recent_files[directory] = file
            kept_files.append(os.path.basename(file))
        else:
            if os.path.getmtime(file) < os.path.getmtime(most_recent_files[directory]):
                deleted_files.append(os.path.basename(file))
                os.remove(file)
            else:
                os.remove(most_recent_files[directory])
                deleted_files.append(os.path.basename(most_recent_files[directory]))
                most_recent_files[directory] = file
                kept_files.append(os.path.basename(file))

    # Keep the most recent file in each directory
    for directory, most_recent_file in most_recent_files.items():
        files_in_directory = glob.glob(os.path.join(directory, "*"))
        for file in files_in_directory:
            if file != most_recent_file:
                os.remove(file)
                deleted_files.append(os.path.basename(file))
            else:
                kept_files.append(os.path.basename(file))

    df_deleted = create_file_dataframe(deleted_files, "Deleted Files", lambda x: os.path.basename(x))
    
    # Create a DataFrame of the kept files.
    df_kept = create_file_dataframe(kept_files, "Kept Files", lambda x: os.path.join(*os.path.split(x)[-2:]))
    
    # Merge the two DataFrames side by side, ignoring NaN values
    result = df_deleted.merge(df_kept, how='outer', left_index=True, right_index=True)

    return result

In [61]:
remove_all_except_recent_files(directory)

Unnamed: 0,Deleted Files,Kept Files
