In [4]:
import utils
import os
import re
import pandas as pd

In [7]:
automate = True

REPORT_PATTERNS = {
    r"SALES\s+PERFORMANCE\s+REPORT\s+Including\s+GST.*Retail\s*\$": utils.sales_perf_inc_gst_on_retail,
    r"SALES\s+PERFORMANCE\s+REPORT\s+Excluding\s+GST.*Retail\s*\$": utils.sales_perf_exc_gst_on_retail,
    r"SALES\s+PERFORMANCE\s+REPORT\s+Including\s+Tax.*Net\s*\$":    utils.sales_perf_inc_tax_on_net,
    r"SALES\s+PERFORMANCE\s+REPORT\s+Excluding\s+Tax.*Net\s*\$":    utils.sales_perf_exc_tax_on_net,
    r"BEST\s+SELLERS\s+REPORT\s+BY\s+QUANTITY":                     utils.best_sellers_qty,
    r"BEST\s+SELLERS\s+REPORT\s+BY\s+VALUE":                        utils.best_sellers_value,
    r"DAILY\s+SALES\s+SUMMARY\s+REPORT":                            utils.daily_sales_summary,
    r"MARKDOWN\s+REPORT":                                           utils.markdown,
    r"SALES\s+BY\s+CATEGORY\s+REPORT":                              utils.sales_by_category,
    r"Sales\s+By\s+Customer\s+Report":                              utils.sales_by_customer,
    r"SALES\s+BY\s+SUB\s+DEPARTMENT\s+REPORT":                      utils.sales_by_sub_dept,
    r"STOCK\s+REFILL\s+REPORT":                                     utils.stock_refill,
    r"TENDER\s+BREAKDOWN\s+DETAIL\s+REPORT":                        utils.tender_breakdown_detail,
    r"TENDER\s+BREAKDOWN\s+SUMMARY\s+REPORT":                       utils.tender_breakdown_summary,
}

FLAGS = re.I | re.S  # case-insensitive; dot matches newlines

def detect_handler(text: str):
    # normalise multiple spaces/newlines for more robust matching
    norm = re.sub(r"[ \t]+", " ", text)
    for pattern, handler in REPORT_PATTERNS.items():
        if re.search(pattern, norm, FLAGS):
            return handler, pattern
    return None, None

def process_report(text: str):
    handler, pattern = detect_handler(text)
    if handler:
        print(f"Matched pattern: {pattern}")
        report_df = handler(text)
        print(report_df)
    else:
        print("Unknown report type — no handler matched.")

inputs = []

if automate:
    path = r"C:\Users\Siva\Documents\GitHub\Power-BI-Tutorial-MS-Press\Power BI Proj SC FS BM\25 03 27 Full Report Set"

    try:
        if not os.path.exists(path):
            raise FileNotFoundError(f"❌ Directory not found: {path}")

        txt_files = [f for f in os.listdir(path) if f.endswith(".txt")]
        if not txt_files:
            raise FileNotFoundError("❌ No .txt files found in the directory.")

        for filename in txt_files:
            file_path = os.path.join(path, filename)
            with open(file_path, "r", encoding="utf-8") as f:
                inputs.append(f.read())

        print(f"✅ Loaded {len(inputs)} reports successfully.")

    except Exception as e:
        print(f"Error: {e}")
        inputs = []  # ensure defined, even after error

else:
    report = input("Paste your report here:\n")
    inputs = [report]

for text in inputs:
    process_report(text)


✅ Loaded 16 reports successfully.
Matched pattern: SALES\s+PERFORMANCE\s+REPORT\s+Including\s+GST.*Retail\s*\$
Processing: Sales Performance (Incl. GST on Retail $)
    sales_person  retail_$_incl_disc  discount  returns     net_$  \
0      Alejandra              134.99      0.00  -134.99      0.00   
1          Alice              231.98    -50.00  -381.97   -149.99   
2           Amal              381.97    -26.00  -237.98    143.99   
3         Amanda            57239.91  10199.23 -3265.24  53974.67   
4          Amber                0.00    -50.00  -199.99   -199.99   
..           ...                 ...       ...      ...       ...   
145      Valeria                0.00      0.00  -189.00   -189.00   
146     Victoria             2407.32    904.42   -74.99   2332.33   
147    Wendy Ann            18843.52   3883.40  -829.43  18014.09   
148        Yifan              127.49    -42.50  -127.49      0.00   
149          Zoe                0.00     -9.00   -80.99    -80.99   

     a

  df['date'] = pd.to_datetime(df['date'])


     markdown_id markdown_description        date style_code      description  \
0           4180              Nothing  16/01/2024     108230        Eva Capri   
1           4180              Nothing  16/01/2024     111750        Eva Short   
2           4180              Nothing  16/01/2024     111750        Eva Short   
3           4180              Nothing  16/01/2024     111767    Martina Short   
4           4227         SCW24FINALPP  11/04/2024     111800        Eva Capri   
...          ...                  ...         ...        ...              ...   
4067        4318          SCFINALPPKW  30/10/2024  SPKTP0116  SEA GARDEN LIBE   
4068        4318          SCFINALPPKW  30/10/2024  SPKTP0119  AUDREY SS FRILL   
4069        4318          SCFINALPPKW  30/10/2024  SPKTP0120  JO PRINTED FRIL   
4070        4318          SCFINALPPKW  30/10/2024  SPKTP0121  EVELYN PRINT BL   
4071        4318          SCFINALPPKW  30/10/2024  SPKTPO112    WILLOW BLOUSE   

      department      colou

  df['time'] = pd.to_datetime(df['time'], dayfirst=True)
