In [8]:
import dask.dataframe as dd
from dask.distributed import Client
import os
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
import numpy as np
import pyarrow.compute as pc
import gc
from decimal import Decimal  # Add this import statement
import shutil


In [2]:
import gc

# Function to flush the cache
def flush_cache():
    gc.collect()

In [3]:
flush_cache()

In [4]:
speriod=int(input("Enter the simulation period: "))
samples=int(input("Enter the number of samples: "))

In [5]:
# Define the folder containing the Parquet files
folder_path = r'D:\RISHIN\13_ILC_TASK1\input\PARQUET_FILES'

# List all Parquet files in the folder
parquet_files = [os.path.join(folder_path, f) for f in os.listdir(folder_path) if f.endswith('.parquet')]

In [6]:
output_folder_path = input("Enter the output folder path: ")


In [7]:

# Check if there are any Parquet files in the folder
if parquet_files:
    # Read the first Parquet file in chunks
    parquet_file = pq.ParquetFile(parquet_files[0])
    for batch in parquet_file.iter_batches(batch_size=1000):
        # Convert the first batch to a PyArrow Table
        table = pa.Table.from_batches([batch])
        
        # Convert the PyArrow Table to a Pandas DataFrame
        df = table.to_pandas()
        
        # Extract the first value of LocationName and split it by '_'
        location_name = df['LocationName'].iloc[0]
        country = location_name.split('_')[0]
        
        
        # Define the main folder path
        main_folder_path = os.path.join(output_folder_path, f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_Losses')
        
        # Define subfolders
        subfolders = ['EP', 'PLT', 'STATS']
        nested_folders = ['Lob', 'Portfolio']
        innermost_folders = ['GR', 'GU']
        
        # Create the main folder and subfolders
        for subfolder in subfolders:
            subfolder_path = os.path.join(main_folder_path, subfolder)
            os.makedirs(subfolder_path, exist_ok=True)
            
            for nested_folder in nested_folders:
                nested_folder_path = os.path.join(subfolder_path, nested_folder)
                os.makedirs(nested_folder_path, exist_ok=True)
                
                for innermost_folder in innermost_folders:
                    innermost_folder_path = os.path.join(nested_folder_path, innermost_folder)
                    os.makedirs(innermost_folder_path, exist_ok=True)
        
        print(f"Folders created successfully at {main_folder_path}")
        break  # Process only the first batch
else:
    print("No Parquet files found in the specified folder.")

Folders created successfully at D:\RISHIN\TESTING FOLDER\ILC-TEST_2\ILC2024_EUWS_PLA_WI_EP_BE_EUR_Losses


In [7]:
# For EP LOB GU 

In [21]:
import pyarrow as pa
import pyarrow.parquet as pq
import os

processing_folder_path = os.path.join(output_folder_path, 'processing')
os.makedirs(processing_folder_path, exist_ok=True)

# Initialize an empty list to store the results
final_grouped_table_1 = []
final_grouped_table_2 = []

# Process each Parquet file individually
for file in parquet_files:
    # Read the Parquet file into a PyArrow Table
    table = pq.read_table(file)
    
    # Perform the aggregation: max the Loss column grouped by EventId, PeriodId, LobName, and LocationId
    grouped_table_1 = table.group_by(['EventId', 'PeriodId', 'LobName', 'LocationId']).aggregate([('Loss', 'mean')])
    grouped_table_1 = grouped_table_1.rename_columns(['EventId', 'PeriodId', 'LobName', 'LocationId', 'Max_Loss'])
    grouped_table_1 = grouped_table_1.group_by(['EventId', 'PeriodId', 'LobName']).aggregate([('Max_Loss', 'sum')])
    grouped_table_1 = grouped_table_1.rename_columns(['EventId', 'PeriodId', 'LobName', 'Max_Loss'])
    
    # Perform the aggregation: sum the Loss column grouped by EventId, PeriodId, and LobName
    grouped_table_2 = table.group_by(['EventId', 'PeriodId', 'LobName']).aggregate([('Loss', 'sum')])
    grouped_table_2 = grouped_table_2.rename_columns(['EventId', 'PeriodId', 'LobName', 'Sum_Loss'])
    
    # Write intermediate results to disk
    pq.write_table(grouped_table_1, os.path.join(processing_folder_path, f'grouped_table_1_{os.path.basename(file)}'))
    pq.write_table(grouped_table_2, os.path.join(processing_folder_path, f'grouped_table_2_{os.path.basename(file)}'))

# Read all intermediate files and concatenate them
intermediate_files_1 = [os.path.join(processing_folder_path, f) for f in os.listdir(processing_folder_path) if f.startswith('grouped_table_1_')]
intermediate_files_2 = [os.path.join(processing_folder_path, f) for f in os.listdir(processing_folder_path) if f.startswith('grouped_table_2_')]

final_grouped_table_1 = [pq.read_table(f) for f in intermediate_files_1]
final_grouped_table_2 = [pq.read_table(f) for f in intermediate_files_2]

final_table_1 = pa.concat_tables(final_grouped_table_1)
final_table_2 = pa.concat_tables(final_grouped_table_2)

# Perform final grouping and sorting
f_grouped_table_1 = final_table_1.group_by(['EventId', 'PeriodId', 'LobName']).aggregate([('Max_Loss', 'sum')])
f_grouped_table_2 = final_table_2.group_by(['EventId', 'PeriodId', 'LobName']).aggregate([('Sum_Loss', 'sum')])
sorted_final_table_1 = f_grouped_table_1.sort_by([('Max_Loss_sum', 'descending')])
sorted_final_table_2 = f_grouped_table_2.sort_by([('Sum_Loss_sum', 'descending')])

# Convert to pandas DataFrames
dataframe_1_oep = sorted_final_table_1
dataframe_1_aep = sorted_final_table_2

# you can delete the steps below later

# Create the processedfiles folder within the output folder
processed_files_folder = os.path.join(output_folder_path, 'processedfiles')
os.makedirs(processed_files_folder, exist_ok=True)

# Define the paths for the final concatenated files
final_oep_path = os.path.join(processed_files_folder, 'final_dataframe_1_oep.parquet')
final_aep_path = os.path.join(processed_files_folder, 'final_dataframe_1_aep.parquet')

# Write the final concatenated files to disk
pq.write_table(dataframe_1_oep, final_oep_path)
pq.write_table(dataframe_1_aep, final_aep_path)

# Delete all non-concatenated files
for f in intermediate_files_1 + intermediate_files_2:
    os.remove(f)
# Delete the 'process' folder
if os.path.exists(processing_folder_path):
    shutil.rmtree(processing_folder_path)


print(f'Final OEP file path: {final_oep_path}')

print(f'Final AEP file path: {final_aep_path}')

Final OEP file path: D:\RISHIN\TESTING FOLDER\ILC-TEST_2\processedfiles\final_dataframe_1_oep.parquet
Final AEP file path: D:\RISHIN\TESTING FOLDER\ILC-TEST_2\processedfiles\final_dataframe_1_aep.parquet


In [22]:
flush_cache()

In [23]:


grouped_dataframes_folder_path = os.path.join(output_folder_path, 'grouped_dataframes')
intermediate_grouping_folder_path = os.path.join(output_folder_path, 'intermediate_grouping')

# Create grouped dataframes and intermediate grouping folders
os.makedirs(grouped_dataframes_folder_path, exist_ok=True)
os.makedirs(intermediate_grouping_folder_path, exist_ok=True)

# Load the parquet files into pyarrow tables
dataframe_1_oep = pq.read_table(final_oep_path)
dataframe_1_aep = pq.read_table(final_aep_path)

# Function to filter and assign to variables if not empty
def filter_and_assign(table, column_name, value):
    filtered_table = table.filter(pa.compute.equal(table[column_name], value))
    if filtered_table.num_rows > 0:
        return filtered_table
    return None

# Function to save table in chunks
def save_table_in_chunks(table, folder_path, filename_prefix, chunk_size=100000):
    num_chunks = (table.num_rows + chunk_size - 1) // chunk_size
    for i in range(num_chunks):
        chunk = table.slice(i * chunk_size, chunk_size)
        file_path = os.path.join(folder_path, f"{filename_prefix}_chunk_{i}.parquet")
        pq.write_table(chunk, file_path)

# Filter and assign to variables
daf_AGR_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'AGR')
daf_AUTO_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'AUTO')
daf_COM_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'COM')
daf_IND_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'IND')
daf_SPER_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'SPER')
daf_FRST_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'FRST')
daf_GLH_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'GLH')

daf_AGR_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'AGR')
daf_AUTO_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'AUTO')
daf_COM_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'COM')
daf_IND_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'IND')
daf_SPER_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'SPER')
daf_FRST_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'FRST')
daf_GLH_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'GLH')

# Save filtered tables in chunks if they are not None
if daf_AGR_oep is not None:
    save_table_in_chunks(daf_AGR_oep, intermediate_grouping_folder_path, 'partial_daf_AGR_oep')
if daf_AUTO_oep is not None:
    save_table_in_chunks(daf_AUTO_oep, intermediate_grouping_folder_path, 'partial_daf_AUTO_oep')
if daf_COM_oep is not None:
    save_table_in_chunks(daf_COM_oep, intermediate_grouping_folder_path, 'partial_daf_COM_oep')
if daf_IND_oep is not None:
    save_table_in_chunks(daf_IND_oep, intermediate_grouping_folder_path, 'partial_daf_IND_oep')
if daf_SPER_oep is not None:
    save_table_in_chunks(daf_SPER_oep, intermediate_grouping_folder_path, 'partial_daf_SPER_oep')
if daf_FRST_oep is not None:
    save_table_in_chunks(daf_FRST_oep, intermediate_grouping_folder_path, 'partial_daf_FRST_oep')
if daf_GLH_oep is not None:
    save_table_in_chunks(daf_GLH_oep, intermediate_grouping_folder_path, 'partial_daf_GLH_oep')

if daf_AGR_aep is not None:
    save_table_in_chunks(daf_AGR_aep, intermediate_grouping_folder_path, 'partial_daf_AGR_aep')
if daf_AUTO_aep is not None:
    save_table_in_chunks(daf_AUTO_aep, intermediate_grouping_folder_path, 'partial_daf_AUTO_aep')
if daf_COM_aep is not None:
    save_table_in_chunks(daf_COM_aep, intermediate_grouping_folder_path, 'partial_daf_COM_aep')
if daf_IND_aep is not None:
    save_table_in_chunks(daf_IND_aep, intermediate_grouping_folder_path, 'partial_daf_IND_aep')
if daf_SPER_aep is not None:
    save_table_in_chunks(daf_SPER_aep, intermediate_grouping_folder_path, 'partial_daf_SPER_aep')
if daf_FRST_aep is not None:
    save_table_in_chunks(daf_FRST_aep, intermediate_grouping_folder_path, 'partial_daf_FRST_aep')
if daf_GLH_aep is not None:
    save_table_in_chunks(daf_GLH_aep, intermediate_grouping_folder_path, 'partial_daf_GLH_aep')

import os
import pyarrow as pa
import pyarrow.parquet as pq

def align_schema(table, target_schema):
    """
    Align the schema of the table to the target schema by renaming columns.
    """
    table_schema = table.schema
    new_fields = []
    for field in target_schema:
        if field.name in table_schema.names:
            new_fields.append(table.field(field.name))
        else:
            new_fields.append(pa.field(field.name, field.type))
    return table.rename_columns([field.name for field in new_fields])

def concatenate_and_save(folder_path, output_folder_path):
    grouped_files = {}
    for file in os.listdir(folder_path):
        if file.endswith('.parquet'):
            prefix = '_'.join(file.split('_')[:-2])
            if prefix not in grouped_files:
                grouped_files[prefix] = []
            grouped_files[prefix].append(file)
    
    for prefix, files in grouped_files.items():
        tables = []
        target_schema = None
        for file in files:
            table = pq.read_table(os.path.join(folder_path, file))
            if target_schema is None:
                target_schema = table.schema
            else:
                table = align_schema(table, target_schema)
            tables.append(table)
        if tables:
            concatenated_table = pa.concat_tables(tables)
            pq.write_table(concatenated_table, os.path.join(output_folder_path, f"{prefix}.parquet"))

def safe_concatenate_and_save(intermediate_path, grouped_path):
    try:
        concatenate_and_save(intermediate_path, grouped_path)
    except (FileNotFoundError, ValueError) as e:
        print(f"Skipping due to error: {e}")

# Create grouped dataframes and intermediate grouping folders
grouped_dataframes_folder_path = os.path.join(output_folder_path, 'grouped_dataframes')
intermediate_grouping_folder_path = os.path.join(output_folder_path, 'intermediate_grouping')
os.makedirs(grouped_dataframes_folder_path, exist_ok=True)
os.makedirs(intermediate_grouping_folder_path, exist_ok=True)

# Load the parquet files into pyarrow tables
dataframe_1_oep = pq.read_table(final_oep_path)
dataframe_1_aep = pq.read_table(final_aep_path)

# Function to filter and assign to variables if not empty
def filter_and_assign(table, column_name, value):
    filtered_table = table.filter(pa.compute.equal(table[column_name], value))
    if filtered_table.num_rows > 0:
        return filtered_table
    return None

# Function to save table in chunks
def save_table_in_chunks(table, folder_path, filename_prefix, chunk_size=100000):
    num_chunks = (table.num_rows + chunk_size - 1) // chunk_size
    for i in range(num_chunks):
        chunk = table.slice(i * chunk_size, chunk_size)
        file_path = os.path.join(folder_path, f"{filename_prefix}_chunk_{i}.parquet")
        pq.write_table(chunk, file_path)

# Filter and assign to variables
daf_AGR_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'AGR')
daf_AUTO_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'AUTO')
daf_COM_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'COM')
daf_IND_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'IND')
daf_SPER_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'SPER')
daf_FRST_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'FRST')
daf_GLH_oep = filter_and_assign(dataframe_1_oep, 'LobName', 'GLH')

daf_AGR_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'AGR')
daf_AUTO_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'AUTO')
daf_COM_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'COM')
daf_IND_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'IND')
daf_SPER_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'SPER')
daf_FRST_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'FRST')
daf_GLH_aep = filter_and_assign(dataframe_1_aep, 'LobName', 'GLH')

# Save filtered tables in chunks if they are not None
if daf_AGR_oep is not None:
    save_table_in_chunks(daf_AGR_oep, intermediate_grouping_folder_path, 'partial_daf_AGR_oep')
if daf_AUTO_oep is not None:
    save_table_in_chunks(daf_AUTO_oep, intermediate_grouping_folder_path, 'partial_daf_AUTO_oep')
if daf_COM_oep is not None:
    save_table_in_chunks(daf_COM_oep, intermediate_grouping_folder_path, 'partial_daf_COM_oep')
if daf_IND_oep is not None:
    save_table_in_chunks(daf_IND_oep, intermediate_grouping_folder_path, 'partial_daf_IND_oep')
if daf_SPER_oep is not None:
    save_table_in_chunks(daf_SPER_oep, intermediate_grouping_folder_path, 'partial_daf_SPER_oep')
if daf_FRST_oep is not None:
    save_table_in_chunks(daf_FRST_oep, intermediate_grouping_folder_path, 'partial_daf_FRST_oep')
if daf_GLH_oep is not None:
    save_table_in_chunks(daf_GLH_oep, intermediate_grouping_folder_path, 'partial_daf_GLH_oep')

if daf_AGR_aep is not None:
    save_table_in_chunks(daf_AGR_aep, intermediate_grouping_folder_path, 'partial_daf_AGR_aep')
if daf_AUTO_aep is not None:
    save_table_in_chunks(daf_AUTO_aep, intermediate_grouping_folder_path, 'partial_daf_AUTO_aep')
if daf_COM_aep is not None:
    save_table_in_chunks(daf_COM_aep, intermediate_grouping_folder_path, 'partial_daf_COM_aep')
if daf_IND_aep is not None:
    save_table_in_chunks(daf_IND_aep, intermediate_grouping_folder_path, 'partial_daf_IND_aep')
if daf_SPER_aep is not None:
    save_table_in_chunks(daf_SPER_aep, intermediate_grouping_folder_path, 'partial_daf_SPER_aep')
if daf_FRST_aep is not None:
    save_table_in_chunks(daf_FRST_aep, intermediate_grouping_folder_path, 'partial_daf_FRST_aep')
if daf_GLH_aep is not None:
    save_table_in_chunks(daf_GLH_aep, intermediate_grouping_folder_path, 'partial_daf_GLH_aep')

# Call the function to concatenate and save the files
safe_concatenate_and_save(intermediate_grouping_folder_path, grouped_dataframes_folder_path)


In [19]:
# Delete other folders and their contents
def delete_folder(folder_path):
    if os.path.exists(folder_path):
        shutil.rmtree(folder_path)

delete_folder(processed_files_folder)
delete_folder(intermediate_grouping_folder_path)

In [24]:
import decimal
import pyarrow as pa
import pyarrow.parquet as pq

def process_and_save_parquet_2(dataframe_1_oep, dataframe_1_aep, parquet_file_path, speriod, samples):
    # Group by 'PeriodId' and 'LobName' and aggregate 'Max_Loss_sum' for OEP
    grouped_oep = dataframe_1_oep.group_by(['PeriodId', 'LobName']).aggregate([('Max_Loss_sum', 'max')])
    grouped_oep = grouped_oep.rename_columns(['PeriodId', 'LobName', 'Max_Loss'])

    # Group by 'PeriodId' and 'LobName' and aggregate 'Sum_Loss_sum' for AEP
    grouped_aep = dataframe_1_aep.group_by(['PeriodId', 'LobName']).aggregate([('Sum_Loss_sum', 'sum')])
    grouped_aep = grouped_aep.rename_columns(['PeriodId', 'LobName', 'S_Sum_Loss'])

    # Calculate additional columns for OEP
    rate = pa.array([1 / (speriod * samples)] * len(grouped_oep))
    cumrate = pa.compute.cumulative_sum(rate)
    rps = pa.compute.divide(1, cumrate)
    max_loss_shifted = pa.compute.shift(grouped_oep['Max_Loss'], -1)
    cumrate_shifted = pa.compute.shift(cumrate, -1)
    tce_oep_1 = pa.compute.multiply(pa.compute.subtract(grouped_oep['Max_Loss'], max_loss_shifted),
                                    pa.compute.multiply(pa.compute.add(cumrate, cumrate_shifted), 0.5))
    tce_oep_2 = pa.compute.multiply(pa.compute.cumulative_sum(pa.compute.shift(tce_oep_1, 1)), rps)
    tce_oep_final = pa.compute.add(tce_oep_2, grouped_oep['Max_Loss'])

    # Calculate additional columns for AEP
    s_sum_loss_shifted = pa.compute.shift(grouped_aep['S_Sum_Loss'], -1)
    tce_aep_1 = pa.compute.multiply(pa.compute.subtract(grouped_aep['S_Sum_Loss'], s_sum_loss_shifted),
                                    pa.compute.multiply(pa.compute.add(cumrate, cumrate_shifted), 0.5))
    tce_aep_2 = pa.compute.multiply(pa.compute.cumulative_sum(pa.compute.shift(tce_aep_1, 1)), rps)
    tce_aep_final = pa.compute.add(tce_aep_2, grouped_aep['S_Sum_Loss'])

    # Create final tables
    final_oep = pa.table([grouped_oep['PeriodId'], grouped_oep['LobName'], grouped_oep['Max_Loss'], tce_oep_final, rps],
                         names=['PeriodId', 'LobName', 'OEP', 'TCE-OEP', 'RPs'])
    final_aep = pa.table([grouped_aep['PeriodId'], grouped_aep['LobName'], grouped_aep['S_Sum_Loss'], tce_aep_final, rps],
                         names=['PeriodId', 'LobName', 'AEP', 'TCE-AEP', 'RPs'])

    # Map LobName to LobId
    lobname_to_lobid = {
        'AGR': 1,
        'AUTO': 2,
        'COM': 3,
        'IND': 4,
        'SPER': 5,
        'FRST': 6,
        'GLH': 7
    }
    lobid_array = pa.array([lobname_to_lobid[name.as_py()] for name in final_oep['LobName']])
    lobid_decimal = pa.array([decimal.Decimal(x).scaleb(-0) for x in lobid_array])

    final_oep = final_oep.append_column('LobId', lobid_decimal)
    final_aep = final_aep.append_column('LobId', lobid_decimal)

    # Melt the tables
    melted_oep = pa.table([final_oep['RPs'], final_oep['LobId'], final_oep['LobName'], final_oep['OEP'], final_oep['TCE-OEP']],
                          names=['ReturnPeriod', 'LobId', 'LobName', 'OEP', 'TCE-OEP'])
    melted_aep = pa.table([final_aep['RPs'], final_aep['LobId'], final_aep['LobName'], final_aep['AEP'], final_aep['TCE-AEP']],
                          names=['ReturnPeriod', 'LobId', 'LobName', 'AEP', 'TCE-AEP'])

    # Concatenate the final tables
    final_table = pa.concat_tables([melted_oep, melted_aep])

    # Define the schema to match the required Parquet file schema
    schema = pa.schema([
        pa.field('EPType', pa.string(), nullable=True),
        pa.field('Loss', pa.float64(), nullable=True),
        pa.field('ReturnPeriod', pa.float64(), nullable=True),
        pa.field('LobId', pa.decimal128(38, 0), nullable=True),
        pa.field('LobName', pa.string(), nullable=True)
    ])

    # Convert to Arrow Table with the specified schema
    table = pa.Table.from_arrays([final_table['EPType'], final_table['Loss'], final_table['ReturnPeriod'], final_table['LobId'], final_table['LobName']], schema=schema)

    # Save to Parquet
    pq.write_table(table, parquet_file_path)

    print(f"Parquet file saved successfully at {parquet_file_path}")

In [25]:
pq_file_path_1=os.path.join(main_folder_path, 'EP', 'Lob', 'GU', f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_EP_Lob_GU_0.parquet')

pq_file_path_2=os.path.join(main_folder_path, 'EP', 'Lob', 'GU', f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_EP_Lob_GU_1.parquet')

pq_file_path_3=os.path.join(main_folder_path, 'EP', 'Lob', 'GU', f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_EP_Lob_GU_2.parquet')

pq_file_path_4=os.path.join(main_folder_path, 'EP', 'Lob', 'GU', f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_EP_Lob_GU_3.parquet')

pq_file_path_5=os.path.join(main_folder_path, 'EP', 'Lob', 'GU', f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_EP_Lob_GU_4.parquet')

pq_file_path_6=os.path.join(main_folder_path, 'EP', 'Lob', 'GU', f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_EP_Lob_GU_5.parquet')

pq_file_path_7=os.path.join(main_folder_path, 'EP', 'Lob', 'GU', f'ILC2024_EUWS_PLA_WI_EP_{country}_EUR_EP_Lob_GU_6.parquet')




In [30]:
try:
    process_and_save_parquet_2(daf_AGR_oep,daf_AGR_aep, pq_file_path_1, speriod, samples)
except NameError:
    pass

try:
    process_and_save_parquet_2(daf_AUTO_oep,daf_AUTO_aep, pq_file_path_2, speriod, samples)
except NameError:
    pass

try:
    process_and_save_parquet_2(daf_COM_oep,daf_COM_aep, pq_file_path_3, speriod, samples)
except NameError:
    pass

try:
    process_and_save_parquet_2(daf_IND_oep,daf_IND_aep, pq_file_path_4, speriod, samples)
except NameError:
    pass

try:
    process_and_save_parquet_2(daf_SPER_oep,daf_SPER_aep, pq_file_path_5, speriod, samples)
except NameError:
    pass

try:
    process_and_save_parquet_2(daf_FRST_oep,daf_FRST_aep, pq_file_path_6, speriod, samples)
except NameError:
    pass

try:
    process_and_save_parquet_2(daf_GLH_oep,daf_GLH_aep, pq_file_path_7, speriod, samples)
except NameError:
    pass

AttributeError: module 'pyarrow.compute' has no attribute 'shift'