In [None]:
import pandas as pd
import os

In [None]:
df_train = pd.read_csv("data/train.csv")
df_test = pd.read_csv("data/test.csv")

## Helper functions

In [None]:
categorical_columns = ['Brand', 'Material', 'Size', 'Laptop Compartment','Waterproof', 'Style', 'Color']
numeric_columns = ['Weight Capacity (kg)', 'Price']

In [None]:
def perform_transformation_and_save_data(df: pd.DataFrame, transformation, name: str):
    """Applies a transformation to a DataFrame and saves it as a CSV file.

    Args:
        df (pd.DataFrame): The DataFrame to transform.
        transformation (callable): A function to apply to each column or row.
        name (str): The filename for the output CSV.

    Returns:
        None
    """
    if not isinstance(df, pd.DataFrame):
        raise ValueError("Input df must be a pandas DataFrame.")

    if not callable(transformation):
        raise ValueError("Transformation must be a callable function.")

    os.makedirs("data", exist_ok=True)

    new_df = transformation(df)

    if not name.endswith(".csv"):
        name += ".csv"

    new_df.to_csv(f"data/{name}", index=False)
    
    return new_df


In [None]:
def fill_na_with_missing(df, columns):
    for col in columns:
        df[col] = df[col].fillna("Missing")
        
        
    return df

In [None]:
def one_hot_encode(df: pd.DataFrame, columns) -> pd.DataFrame:
    
    return pd.get_dummies(df, columns=columns, drop_first=True)


## Transformations

In [None]:
def generic_transformation(df):
    
    cols_to_transform = categorical_columns = ['Brand', 'Material', 'Size', 'Laptop Compartment','Waterproof', 'Style', 'Color']
    
    new_df = fill_na_with_missing(df, cols_to_transform)
    new_df = one_hot_encode(new_df, cols_to_transform)
    
    return new_df

#### DEMO

In [None]:
new_train = perform_transformation_and_save_data(df_train, generic_transformation, "new_train")
new_test = perform_transformation_and_save_data(df_test, generic_transformation, "new_test")

In [None]:
# notice there are no null values in any columns

new_train.info()