In [1]:
import pandas as pd
import numpy as np
import logging

In [2]:
logging.basicConfig(
    level=logging.DEBUG,  # Set the minimum logging level
    format="%(asctime)s - %(levelname)s - %(message)s",  # Log format
    datefmt="%Y-%m-%d %H:%M:%S"  # Date format
)

In [3]:
def extract(file):
    try:
        logging.info(f'Starting extraction from {file}')
        df = pd.read_excel(file, engine='openpyxl')
        logging.info('Extraction complete!')
        return df
    except Exception as e:
        logging.error(f'Error extracting from {file}: {e}', exc_info=True)
        return None

def transform(df):
    try:
        logging.info('Starting transformation...')
        df = df.rename(columns = {'reportyear':'report_year',
                          'geotype':'geo_type', 
                          'geotypevalue':'geo_type_value',
                          'geoname':'geo_name'})
        logging.info('Transformation complete!')
        return df
    except Exception as e:
        logging.error(f'Transformation failed: {e}', exc_info=True)
        return None

def load(df, file_path):
    try:
        logging.info(f'Loading to {file_path}')
        df.to_csv(file_path)
        logging.info('Loading complete!')
    except Exception as e:
        logging.error(f'Loading to {file_path} failed: {e}', exc_info=True)
        return None


raw_data = extract(r'C:\Users\HP\Tina/Projects/mode_of_transportation.xlsx')
transformed_data = transform(raw_data)
load(transformed_data, r'C:\Users\HP\Tina/Projects/updated_mode_of_transportation.csv')

2025-08-11 22:43:59 - INFO - Starting extraction from C:\Users\HP\Tina/Projects/mode_of_transportation.xlsx
2025-08-11 22:46:05 - INFO - Extraction complete!
2025-08-11 22:46:05 - INFO - Starting transformation...
2025-08-11 22:46:05 - INFO - Transformation complete!
2025-08-11 22:46:05 - INFO - Loading to C:\Users\HP\Tina/Projects/updated_mode_of_transportation.csv
2025-08-11 22:46:16 - INFO - Loading complete!
