In [17]:
# Import necessary libraries
import json
import pandas as pd
import zipfile
import re

with open('config.json') as config_file:
    config = json.load(config_file)

# Extract values from the configuration
input_zip_path = config['clfs_zip_path']
excel_file_within_zip = config['clfs_excel_file']
output_path_template = config['clfs_output_path_template']
rate_column_prefix = config['rate_column_prefix']


# Open the ZIP file in read mode
with zipfile.ZipFile(input_zip_path, 'r') as z:
    # Open the specific Excel file within the ZIP file
    with z.open(excel_file_within_zip) as f:
        # Read the Excel file into a pandas DataFrame, skipping the first 4 rows
        df = pd.read_excel(f, skiprows=4)

# Use a regular expression to search for the pattern 'YYYY QxVx' in the file path
#extracted_pattern = re.search(r'(\d{4} Q\dV\d)', excel_file_within_zip).group(0)
#print(extracted_pattern)

# Month assignment based on quarters
quarter_to_month = {'Q1': '01', 'Q2': '04', 'Q3': '07', 'Q4': '10'}

# Extract the pattern
match = re.search(r'(\d{4}) (Q\d)V\d', excel_file_within_zip)
if match:
    year = match.group(1)
    quarter = match.group(2)
    month = quarter_to_month.get(quarter, '01')  # default to '01' if not found
    extracted_date = f"{year}{month}01"
    loader_file_name_date = f"{year}-{month}-01"
    print(loader_file_name_date)
    print(extracted_date)
else:
    print("Pattern not found.")


#Columns starting with 'RATE'
rate_columns = [col for col in df.columns if col.startswith(rate_column_prefix)]

# Select the required columns
df_selected = df[['HCPCS', 'EFF_DATE', rate_columns[0]]].copy()

# Set EFF_DATE to the extracted_date
df_selected['EFF_DATE'] = extracted_date

# Select columns and create df_final
df_final = df[['HCPCS', 'EFF_DATE', rate_columns[0]]].copy()

# Rename columns
df_final.rename(columns={'HCPCS': 'SVC_CD', rate_columns[0]: 'FEE'}, inplace=True)

# Drop EFF_DATE
df_final.drop(columns=['EFF_DATE'], inplace=True)


#output path for the selected columns in the dataframe
output_path = output_path_template.format(pattern=loader_file_name_date)

df_selected.head()

# Export the DataFrame to an Excel workbook
df_final.to_csv(output_path, index=False, sep='|')

print("success")

2025-04-01
20250401
success


In [16]:
df_selected.head()


Unnamed: 0,HCPCS,EFF_DATE,RATE
0,0001U,20250401,720.0
1,0002M,20250401,503.4
2,0002U,20250401,25.0
3,0003M,20250401,503.4
4,0003U,20250401,950.0


In [15]:
df_final.head()

Unnamed: 0,SVC_CD,FEE
0,0001U,720.0
1,0002M,503.4
2,0002U,25.0
3,0003M,503.4
4,0003U,950.0
