## Links Charges Data Pipeline

In [1]:
import os
import pandas as pd
import numpy as np
import glob
from data_cleaner import InvoiceCleaner,SLACleaner

raw_data_folder_path = 'C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\\SLA EDA\\SLA_ETL\\Data'
processed_data_folder_path = 'C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\\SLA EDA\\SLA_ETL\\Processsed_Data'

# Ensuring the processed folder exist
os.makedirs(processed_data_folder_path, exist_ok=True)

excel_files = glob.glob(f"{raw_data_folder_path}/*.xlsx")

def sla_invoice(sn):
    if "sla" in sn.lower():
        return "SLA"
    elif "invoice" in sn.lower():
        return "Invoice"
    else:
        return f"Unknown"

for excel_file in excel_files:
    xls = pd.ExcelFile(excel_file)
    workbook_name = os.path.splitext(os.path.basename(excel_file))[0]
    invoice_dfs = []
    sla_dfs = []

    for sheet_name in xls.sheet_names:
        df = pd.read_excel(excel_file, sheet_name=sheet_name)

        # Determine the type of sheet for processing
        sheet_type = sla_invoice(sheet_name)

        # New files Name
        w_b = workbook_name.split(" ")[0]
        new_file_name = f"{w_b}_{sheet_type}.csv"
        new_file_path = os.path.join(processed_data_folder_path, new_file_name)



        # Apply different processing based on sheet type
        if sheet_type == "SLA":
            # Specific processing for SLA sheets
            
            df_processed = SLACleaner(df).clean_data()  # Placeholder for SLA-specific processing
            # Creating a list of SLA Data Processed            
            df_sla = df_processed.copy()
            sla_dfs.append(df_sla)
        elif sheet_type == "Invoice":
            # Specific processing for Invoice sheets
            
            df_processed = InvoiceCleaner(df).clean_data()  # Placeholder for Invoice-specific processing
            # Creating a list of all the Invoice data processed
            df_invoice = df_processed.copy()
            invoice_dfs.append(df_invoice)
        else:
            # Default processing or skip
            df_processed = df  # Placeholder for default processing or skipping unknown types

        #Adding the Service Provider Col
        df_processed['Service_Provider'] = w_b

        

        # Save the processed DataFrame to a new CSV file
        df_processed.to_csv(new_file_path, index=False)
        print(f"Processed and Saved: {new_file_path}")
    # Appending the SLA and INvoice dfs
SLA_Combined_df = pd.concat(sla_dfs)
Invoice_Combined_df = pd.concat(invoice_dfs)

# Saving them to the Processed Folder
SLA_Combined_path = f'{processed_data_folder_path}\\SLA_Combined.csv'
SLA_Combined_df.to_csv(SLA_Combined_path, index= False)
print(f'SLA Combined Saved to{SLA_Combined_path}')

Invoice_Combined_path = f'{processed_data_folder_path}\\Invoice_Combined.csv'
SLA_Combined_df.to_csv(Invoice_Combined_path, index= False)
print(f'Invoice Combined Saved to{Invoice_Combined_path}')


print('Imagine Imerun !!!')


Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\LTK_SLA.csv
Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\LTK_Invoice.csv
Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\SAFARICOM_SLA.csv
Processed and Saved: C:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\SAFARICOM_Invoice.csv
SLA Combined Saved toC:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\SLA_Combined.csv
Invoice Combined Saved toC:\Users\USER\OneDrive\KRA WORK\2024 WS\SLA EDA\SLA_ETL\Processsed_Data\Invoice_Combined.csv
Imagine Imerun !!!


### Testing 

In [17]:
import pandas as pd
ltk_sla_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\LTK_SLA.csv')
saf_sla_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\SAFARICOM_SLA.csv')
combined_sla = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\SLA_Combined.csv')

ltk_invoice_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\LTK_Invoice.csv')
saf_invoice_df = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\SAFARICOM_Invoice.csv')
combined_Invoice = pd.read_csv('C:\\Users\\USER\\OneDrive\\KRA WORK\\2024 WS\SLA EDA\\SLA_ETL\\Processsed_Data\\Invoice_Combined.csv')

In [3]:
ltk_invoice_df.head()

Unnamed: 0,Invoice_Data,Link_ID,Invoice_Period,Invoice_Description,Invoice_Reference,Total_QRC,Service_Provider
0,2023-10-13,26046,01-Oct-2023 to 31-Dec-2023,26046 National MPLS KRA MBITA TO VRF (INS-57),CRN-15473,-55986.24,LTK
1,2023-10-06,C-00159-0087,01-Oct-2023 to 31-Dec-2023,C-00159-0087 EPL Connection charge 80MBPS->SAM...,116426,417600.0,LTK
2,2023-10-06,1750,01-Oct-2023 to 31-Dec-2023,01750 National MPLS KRA - TIMES TOWERS TO VRF,116426,939600.0,LTK
3,2023-10-06,5611,01-Oct-2023 to 31-Dec-2023,05611 National MPLS Msa link vlan 460 customs,116426,71835.9,LTK
4,2023-10-06,20103,01-Oct-2023 to 31-Dec-2023,20103 Leased_Circuits_Naticnal KRA WILSON AIRP...,116426,79866.0,LTK


In [4]:
saf_invoice_df.head()

Unnamed: 0,Invoice_Data,Link_ID,Invoice_Period,Invoice_Description,Invoice_Reference,Total_QRC,Service_Provider
0,2023-10-01,95001135,1st Oct to 31st Dec 2023,CUSTOMS OFFICE MALINDI-WIMAX,B1-10096515502,240145.6128,SAFARICOM
1,2023-10-01,95001574,1st Oct to 31st Dec 2023,KRA LOITOKTOK BORDER WIMAX,B1-10096515502,240145.6128,SAFARICOM
2,2023-10-01,95001628,1st Oct to 31st Dec 2023,KRA KRATI MOMBASA WIMAX,B1-10096515502,240145.6128,SAFARICOM
3,2023-10-01,95004139,1st Oct to 31st Dec 2023,KRA - MANDERA,B1-10096515502,180078.7944,SAFARICOM
4,2023-10-01,95004140,1st Oct to 31st Dec 2023,KRA - Moyale,B1-10096515502,240145.6128,SAFARICOM


In [18]:
combined_sla.head()

Unnamed: 0,Link_ID,SLA_Date,Last_Mile,Capacity,Location,MRC_Excl,SLM_Comments,QRC_Incl
0,95054463,2020-12-16,MPLS,25,BUNGOMA MICROWAVE,20352.8,Upgrade 5>25 Mbps. Check Upgrade Docs. KES 449...,79986.504
1,95096538,2020-12-16,MPLS,12,BUSIA-WIMAX,70008.3,,275132.619
2,95038953,2020-12-16,MPLS,80,DR BACKHAUL FIBRE,93504.6,,367473.078
3,95054464,2020-12-16,MPLS,60,ELDORET KIPTAGICH MPLS FIBER,25439.4,Upgrade 10>60 Mbps. Check Upgrade Docs. KES 69...,99976.842
4,95054465,2020-12-16,MPLS,5,ELDORET KPC LOOP MICROWAVE,20357.4,,80004.582


In [19]:
combined_sla.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Link_ID       59 non-null     int64  
 1   SLA_Date      59 non-null     object 
 2   Last_Mile     59 non-null     object 
 3   Capacity      59 non-null     int64  
 4   Location      59 non-null     object 
 5   MRC_Excl      59 non-null     float64
 6   SLM_Comments  21 non-null     object 
 7   QRC_Incl      59 non-null     float64
dtypes: float64(2), int64(2), object(4)
memory usage: 3.8+ KB


In [20]:
combined_Invoice.head()

Unnamed: 0,Link_ID,SLA_Date,Last_Mile,Capacity,Location,MRC_Excl,SLM_Comments,QRC_Incl
0,95054463,2020-12-16,MPLS,25,BUNGOMA MICROWAVE,20352.8,Upgrade 5>25 Mbps. Check Upgrade Docs. KES 449...,79986.504
1,95096538,2020-12-16,MPLS,12,BUSIA-WIMAX,70008.3,,275132.619
2,95038953,2020-12-16,MPLS,80,DR BACKHAUL FIBRE,93504.6,,367473.078
3,95054464,2020-12-16,MPLS,60,ELDORET KIPTAGICH MPLS FIBER,25439.4,Upgrade 10>60 Mbps. Check Upgrade Docs. KES 69...,99976.842
4,95054465,2020-12-16,MPLS,5,ELDORET KPC LOOP MICROWAVE,20357.4,,80004.582


In [16]:
combined_Invoice.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 8 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   Link_ID       59 non-null     int64  
 1   SLA_Date      59 non-null     object 
 2   Last_Mile     59 non-null     object 
 3   Capacity      59 non-null     int64  
 4   Location      59 non-null     object 
 5   MRC_Excl      59 non-null     float64
 6   SLM_Comments  21 non-null     object 
 7   QRC_Incl      59 non-null     float64
dtypes: float64(2), int64(2), object(4)
memory usage: 3.8+ KB


In [22]:
invoice_dfs


1