In [84]:
import pandas as pd
import os

from datetime import datetime, timedelta


In [85]:
class ImportData:
    
    def __init__(self, filename):
        self.filename = filename
        self.export_folder = "../../data/processed/"
    
    def import_jodi_data(self, flow_break_filter, energy_product_filter, unit_measure_filter):
        directory = "../../data/raw/"
        file = self.filename
        
        # Check if files exist in the directory
        if not file:
            raise FileNotFoundError(f"No file found in the directory {directory}")
        
        # Read the first CSV file
        df = pd.read_csv(os.path.join(directory, file))
        
        # Apply filters (if needed, example usage of filters)
        if flow_break_filter:
            df = df[df['ENERGY_PRODUCT'] == flow_break_filter]
        if energy_product_filter:
            df = df[df['FLOW_BREAKDOWN'] == energy_product_filter]
        if unit_measure_filter:
            df = df[df['UNIT_MEASURE'] == unit_measure_filter]
        
        # Cleaning Data 
        df = df.drop(['ENERGY_PRODUCT', 'FLOW_BREAKDOWN', 'UNIT_MEASURE', "ASSESSMENT_CODE"], axis=1)
        df['TIME_PERIOD'] = pd.to_datetime(df['TIME_PERIOD'], format='%Y-%m') + pd.offsets.MonthEnd(0)
        df['OBS_VALUE'] = pd.to_numeric(df["OBS_VALUE"], errors="coerce")
        
        # Filter the DataFrame for 'TIME_PERIOD' == '2024-12-31' and 'OBS_VALUE' != 0
        filtered_ref_area = df[(df['TIME_PERIOD'] == '2024-12-31') & (df['OBS_VALUE'] != 0)]['REF_AREA'].unique()

        # Filter the dataset based on filtered_ref_area
        df = df[df['REF_AREA'].isin(filtered_ref_area)]

        # Generate the filename using the filter values (handle missing filters)
        export_filename = f"{flow_break_filter}_{energy_product_filter}_{unit_measure_filter}.csv"
        filepath = os.path.join(self.export_folder, export_filename)
        
        # Save the filtered DataFrame to CSV
        df.to_csv(filepath, index=False)
        
        print("Data Sucess!!")

In [86]:
ImportData("NewProcedure_Primary_CSV.csv").import_jodi_data("CRUDEOIL", "INDPROD", "KBD")
ImportData("NewProcedure_Primary_CSV.csv").import_jodi_data("CRUDEOIL", "CLOSTLV", "KBBL")
ImportData("NewProcedure_Primary_CSV.csv").import_jodi_data("CRUDEOIL", "DIRECUSE", "KBBL")

Data Sucess!!
Data Sucess!!
Data Sucess!!
