## Links Charges Data Pipeline

In [1]:
import os
import pandas as pd
import numpy as np
import glob
from data_cleaner import InvoiceCleaner,SLACleaner

raw_data_folder_path = 'C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\\SLA EDA\\SLA_ETL\\Data'
processed_data_folder_path = 'C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\\SLA EDA\\SLA_ETL\\Processsed_Data'

# Ensuring the processed folder exist
os.makedirs(processed_data_folder_path, exist_ok=True)

excel_files = glob.glob(f"{raw_data_folder_path}/*.xlsx")

def sla_invoice(sn):
    if "sla" in sn.lower():
        return "SLA"
    elif "invoice" in sn.lower():
        return "Invoice"
    else:
        return f"Unknown"

for excel_file in excel_files:
    xls = pd.ExcelFile(excel_file)
    workbook_name = os.path.splitext(os.path.basename(excel_file))[0]

    for sheet_name in xls.sheet_names:
        df = pd.read_excel(excel_file, sheet_name=sheet_name)

        # Determine the type of sheet for processing
        sheet_type = sla_invoice(sheet_name)

        # New files Name
        w_b = workbook_name.split(" ")[0]
        new_file_name = f"{w_b}_{sheet_type}.csv"
        new_file_path = os.path.join(processed_data_folder_path, new_file_name)

        # Apply different processing based on sheet type
        if sheet_type == "SLA":
            # Specific processing for SLA sheets
            # Example: df_processed = process_sla(df)
            df_processed = SLACleaner(df).clean_data()  # Placeholder for SLA-specific processing
        elif sheet_type == "Invoice":
            # Specific processing for Invoice sheets
            # Example: df_processed = process_invoice(df)
            df_processed = InvoiceCleaner(df).clean_data()  # Placeholder for Invoice-specific processing
        else:
            # Default processing or skip
            df_processed = df  # Placeholder for default processing or skipping unknown types

        #Adding the Service Provider Col
        df_processed['Service_Provider'] = w_b

        # Save the processed DataFrame to a new CSV file
        df_processed.to_csv(new_file_path, index=False)
        print(f"Processed and Saved: {new_file_path}")

Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\LTK_SLA.csv
Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\LTK_Invoice.csv
Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\SAFARICOM_SLA.csv
Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\SAFARICOM_Invoice.csv


### Testing 

In [4]:
ltk_sla_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\LTK_SLA.csv')
saf_sla_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\SAFARICOM_SLA.csv')

ltk_invoice_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\LTK_Invoice.csv')
saf_invoice_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\SAFARICOM_Invoice.csv')

In [5]:
ltk_invoice_df.head()

Unnamed: 0,Invoice_Data,Link_ID,Invoice_Period,Invoice_Description,Invoice_Reference,Total_QRC,Service_Provider
0,2023-10-13,26046,01-Oct-2023 to 31-Dec-2023,26046 National MPLS KRA MBITA TO VRF (INS-57),CRN-15473,-55986.24,LTK
1,2023-10-06,C-00159-0087,01-Oct-2023 to 31-Dec-2023,C-00159-0087 EPL Connection charge 80MBPS->SAM...,116426,417600.0,LTK
2,2023-10-06,1750,01-Oct-2023 to 31-Dec-2023,01750 National MPLS KRA - TIMES TOWERS TO VRF,116426,939600.0,LTK
3,2023-10-06,5611,01-Oct-2023 to 31-Dec-2023,05611 National MPLS Msa link vlan 460 customs,116426,71835.9,LTK
4,2023-10-06,20103,01-Oct-2023 to 31-Dec-2023,20103 Leased_Circuits_Naticnal KRA WILSON AIRP...,116426,79866.0,LTK
