In [1]:
def identify_weekly_sales(trans_weekly, primary_keys):
    
    """
    Identifying total weekly sales on primary key
    
    Args:
        weekly_sales: Pandas DataFrame
            Total weekly sales on primary key
    
    Returns:
        target_variable: Pandas DataFrame
            Target Variable
    """
    
    weekly_sales = pd.merge(primary_keys, trans_weekly, on=["outlet_code", "item_department", "week"], how="left")
    weekly_sales = weekly_sales[["week", "outlet_code", "item_department", "total_sales_qty"]]
    weekly_sales["total_sales_qty"].fillna(0, inplace=True)
    weekly_sales.sort_values("week", inplace=True)
    
    return weekly_sales

#weekly_sales = identify_weekly_sales(trans_weekly, primary_keys)

def create_target_variable(weekly_sales):
    """
    Creating target variable 
    
    Args:
        weekly_sales: pyspark dataframe
            Total weekly sales on primary key
    
    Returns:
        target_variable: pyspark dataframe
            Target Variable with "sales_next_week" column
    """
    
    target_variable = weekly_sales.copy()
    target_variable['sales_next_week'] = target_variable.groupby(['outlet_code', 'item_department'])['total_sales_qty'].shift(-1)
    target_variable = target_variable.groupby(['outlet_code', 'item_department']).apply(lambda x: x.sort_values('week'))
    target_variable = target_variable.reset_index(drop=True)
    
    return target_variable
