# Clean Brands

## Import packages and File

In [1]:
import pandas as pd
import seaborn as sns

df_name = "brands"
path_name = "../../data/raw_data/"
init_df = pd.read_csv(f"{path_name}{df_name}.csv")


# Log-Wrapper

In [2]:
from functools import wraps
import datetime as dt

def log_step(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        tic = dt.datetime.now()
        result = func(*args, **kwargs)
        time_taken = str(dt.datetime.now() - tic)
        print(f"{func.__name__}:\n shape={result.shape} took {time_taken}s\n")
        return result
    return wrapper

## Pipeline Functions

### Pipeline Init

In [3]:
@log_step
def start_pipeline(df):
    return df.copy()

### Rename Columns

In [4]:
@log_step
def rename_columns(df):
    return (
        df
        .rename(columns={
            "short":"brand_short",
            "long":"brand_long"
            }
        )
    )

### Set Data Types

In [5]:
@log_step
def adjust_dtypes(df):  
    return df

### Remove Duplicates

In [6]:
@log_step
def remove_duplicates(df):
    return df.drop_duplicates()

In [7]:
@log_step
def drop_nan(df):
    return df.dropna()


In [14]:
@log_step
def end_pipeline(df):
    df.to_csv(f"../../data/raw_data/{df_name}")
    return df

## Pipeline UI

In [15]:
brands = (
init_df
        .pipe(start_pipeline)
        .pipe(rename_columns)
        .pipe(adjust_dtypes)
        .pipe(remove_duplicates)
        .pipe(end_pipeline)
)

start_pipeline:
 shape=(187, 2) took 0:00:00.000165s

rename_columns:
 shape=(187, 2) took 0:00:00.000772s

adjust_dtypes:
 shape=(187, 2) took 0:00:00.000008s

remove_duplicates:
 shape=(187, 2) took 0:00:00.002286s

end_pipeline:
 shape=(187, 2) took 0:00:00.005638s

