In [None]:
import pandas as pd
from sklearn.svm import SVR

Different simple models
1. Weekly median from all time
2. Monthly median from all time
3. Monthly median from last year
4. Last year's value
6. Lin Reg for all years on store, dept, week

In [None]:
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
features = pd.read_csv("features.csv")
stores = pd.read_csv("stores.csv")

In [None]:
# feature engineering for all
def add_feautres(target, stores, features):
    temp = target.merge(stores, on="Store").merge(features, on=["Store", "Date"])
    temp.drop("IsHoliday_y", axis=1, inplace=True)
    temp.drop("Size", axis=1, inplace=True)
    temp.drop("Temperature", axis=1, inplace=True)
    temp.drop("Type", axis=1, inplace=True)
    temp.fillna(0, inplace=True)
    temp["Date"] = pd.to_datetime(temp.Date)
    temp["year"] = temp.Date.dt.year
    temp["week"] = temp.Date.dt.week
    temp["month"] = temp.Date.dt.month
    
    return temp

train = add_feautres(train, stores, features)
test = add_feautres(test, stores, features)

In [None]:
def make_dept_weekly_median(df):
    """ takes a training df and returns a df with the store, dept, week, 
        and avg weekly sales"""
    gb = df.groupby(["Store", "Dept", "week"]).median()["Weekly_Sales"]
    gb.rename({"Weekly_Sales": "average_weekly_sales"})
    return gb

def make_dept_monthly_median(df):
    """ takes a training df and returns a df with the store, dept, week, 
        and avg monthly sales"""
    gb = df.groupby(["Store", "Dept", "month"]).median()["Weekly_Sales"]
    gb.rename({"Weekly_Sales": "average_monthly_sales"})
    return gb

def make_last_record(df):
    """ Gets the latest value for each of the store, dept, week combinations"""
    gb_idx = df.groupby(["Store", "Dept", "week"])['year'].transform(max) == df['year']
    prev_year = df[gb_idx][["Store", "Dept", "week", 'Weekly_Sales', "month"]]
    return prev_year

In [None]:
def predict_from_median(means_gb, target_df, time):
    return target_df.join(means_gb, on=["Store", "Dept", time], rsuffix= "_"+time+"_means")

def add_prev_year(prev_year_gb, target_df):
    return target_df.join(prev_year_gb, on=["Store", "Dept", "week"], rsuffix="_pyear")

In [None]:
prev_year = make_last_record(train)
monthly_medians = make_dept_monthly_median(prev_year)
weekly_medians = make_dept_weekly_median(train)

In [None]:
models = {}
for store in train.Store.unique():
    for dept in train.Dept.unique():
        for week in train.week.unique():
            try:
                key = f"{store}_{dept}_{week}"

                years = train[(train["Store"] == store) & 
                              (train["Dept"] == dept) & 
                              (train["week"] == week)]["year"]
                values = train[(train["Store"] == store) & 
                               (train["Dept"] == dept) & 
                               (train["week"] == week)]["Weekly_Sales"]

                svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
                svr_rbf.fit(years.values.reshape(-1,1), values)
                models[key] = svr_rbf
            except:
                print(key)
                print("\t",years)
                print("\t",values)
                print()
    