In [1]:
import pandas as pd
import tkinter as tk
from tkinter import filedialog
import io
import os

# Create window for file selection
root = tk.Tk()

# Use file dialog to select the input file
file_path = filedialog.askopenfilename(title="Select the catalog to audit: ")

if not file_path:
    print("No file selected.")
    exit()

# Read the input file with specified encoding and error handling
try:
    with open(file_path, 'rb') as file:
        content = file.read()
        df = pd.read_csv(io.StringIO(content.decode('utf-8', errors='replace')))
except Exception as e:
    print(f"Error reading the file: {e}")
    exit()

# Filter the dataframe based on a specific condition ('Track Item' == 'Y')
df_to_audit = df[df['Track Item'] == 'Y']

# Function to determine whether to sample 'Category' or 'Subcategory'
def determine_sampling_column(df_to_audit):
    if df_to_audit['Subcategory'].isnull().all():
        sampling_column = 'Subcategory'
    else:
        sampling_column = 'Category'
    return sampling_column

# Sub-function to perform the sampling based on the length of the catalog
def sample_items(df_to_audit, sampling_column, sample_fraction):
    if len(df_to_audit) == 0:
        return None
    else:
        return df_to_audit.sample(frac=sample_fraction, random_state=42)

# Perform different levels of sampling based on catalog size
def simple_sample(df_to_audit):
    sampling_column = determine_sampling_column(df_to_audit)
    if len(df_to_audit) >= 2000:  # Assuming 2000 rows as per your description
        return sample_items(df_to_audit, sampling_column, 0.01)
    else:
        return None

# Results of the sampling
final_sample = simple_sample(df_to_audit)

# Mark items that need to be audited as "Audit" in the original DataFrame
if final_sample is not None:
    # Change 'Y' to 'Audit' in the sampled rows before modifying the DataFrame
    df.loc[final_sample.index, 'Track Item'] = 'Audit'  # Modify 'Track Item' to 'Audit' for sampled rows

    # Save the audited catalog to the user's downloads directory
    downloads_path = os.path.join(os.path.expanduser('~'), 'Downloads')
    audited_file_path = os.path.join(downloads_path, 'Catalog_Audit_grr.csv')
    df.to_csv(audited_file_path, index=False)
    print(f"Audited file saved to: {audited_file_path}")
else:
    print("No items to audit.")


Audited file saved to: C:\Users\bryan\Downloads\Catalog_Audit_grr.csv
