In [1]:
import pandas as pd
import numpy as np

def get_raw_data():
    train = pd.read_csv("train_2.csv")
    test = pd.read_csv("key_2.csv")
    return train, test

def transform_data(train, test, periods=-49):
    train_flattened = pd.melt(train[list(train.columns[periods:])+['Page']], id_vars='Page', var_name='date', value_name='Visits')
    train_flattened = get_features(train_flattened)
    test['date'] = test.Page.apply(lambda a: a[-10:])
    test['Page'] = test.Page.apply(lambda a: a[:-11])
    test = get_features(test)
    return train_flattened, test

def get_features(df):
    df['date'] = df['date'].astype('datetime64[ns]')
    df['every2_weekdays'] = df.date.dt.dayofweek // 2
    df['weekend'] = (df.date.dt.dayofweek // 5).astype(float)
    #df['shortweek'] = ((df.date.dt.dayofweek) // 4 == 1).astype(float)
    return df

def predict_using_median_weekend(train, test):
    df = train.copy()
    df = df.drop(['every2_weekdays'], axis=1)
    agg_train_weekend = df.groupby(['Page', 'weekend']).median().reset_index()
    test_df = test.merge(agg_train_weekend, how='left')
    result = test_df['Visits'].values
    return result

def wiggle_preds(df):
    second_term_ixs = df['date'] < '2017-10-13'
    adjusted = df['Visits'].values + df['Visits'].values*0.01
    adjusted[second_term_ixs] = df['Visits'].values[second_term_ixs] + df['Visits'].values[second_term_ixs]*0.01
    df['Visits'] = adjusted
    df.loc[df.Visits.isnull(), 'Visits'] = 0
    df['Visits'] = np.round(df['Visits'].values)
    return df

def get_fibonachi_window_preds():
    # Stolen from:
    #https://www.kaggle.com/safavieh/median-estimation-by-fibonacci-et-al-lb-44-9
    train = pd.read_csv("train_2.csv")
    train = train.fillna(0.)
    Windows = [7, 14, 28, 56, 112, 224, 336, 448,560,784]

    n = train.shape[1] - 1 #  550
    Visits = np.zeros(train.shape[0])
    for i, row in train.iterrows():
        M = []
        start = row[1:].nonzero()[0]
        if len(start) == 0:
            continue
        if n - start[0] < Windows[0]:
            Visits[i] = row.iloc[start[0]+1:].median()
            continue
        for W in Windows:
            if W > n-start[0]:
                break
            M.append(row.iloc[-W:].median())
        Visits[i] = np.median(M)

    Visits[np.where(Visits < 1)] = 0.
    train['Visits'] = Visits

    test = pd.read_csv("key_2.csv")
    test['Page'] = test.Page.apply(lambda x: x[:-11])

    test = test.merge(train[['Page','Visits']], on='Page', how='left')
    test = test.rename(columns={'Visits': 'fibo_visits'})
    return test


   

In [2]:
train, test = get_raw_data()
train, test = transform_data(train, test, periods=-49)
preds_weekend = predict_using_median_weekend(train, test)

test['Visits'] = preds_weekend
test = wiggle_preds(test)
fibo_test = get_fibonachi_window_preds()
test = pd.merge(test, fibo_test, on='Id')
weight = 0.28

preds_ensemble = test['Visits'].values*weight + test['fibo_visits'].values*(1-weight)
preds_ensemble[np.where(preds_ensemble < 1)] = 0.
test['Visits'] = preds_ensemble

test[['Id','Visits']].to_csv('sub_mads_windows_{}.csv'.format(weight), index=False)
print(test[['Id', 'Visits']].head(10))
print(test[['Id', 'Visits']].tail(10))




             Id  Visits
0  0b293039387a  522.80
1  7114389dd824  522.80
2  057b02ff1f09  522.80
3  bd2aca21caa3  546.88
4  c0effb42cdd5  546.88
5  4ccd369adefc  522.80
6  67d5c2ebc330  522.80
7  1c51606e416b  522.80
8  0f27b91481db  522.80
9  07f5f08a49fa  522.80
                   Id  Visits
8993896  2b1ae259f55a  225.92
8993897  75deaba16778  225.92
8993898  a4d97ae6a195  221.72
8993899  4e0c1e64f738  221.72
8993900  9a735df742e2  221.72
8993901  f78fbaa305ad  221.72
8993902  33aecaf259d1  221.72
8993903  a36228b64466  225.92
8993904  26887d3e5c99  225.92
8993905  06ad06979f80  221.72


In [4]:
test.tail(20)

Unnamed: 0,Page_x,Id,date,every2_weekdays,weekend,Visits,Page_y,fibo_visits
8993886,龙生九子_zh.wikipedia.org_mobile-web_all-agents,aeb4cd159568,2017-10-25,1,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993887,龙生九子_zh.wikipedia.org_mobile-web_all-agents,61f9ad4a134b,2017-10-26,1,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993888,龙生九子_zh.wikipedia.org_mobile-web_all-agents,65a1df224420,2017-10-27,2,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993889,龙生九子_zh.wikipedia.org_mobile-web_all-agents,f218e2a344f0,2017-10-28,2,1.0,225.92,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993890,龙生九子_zh.wikipedia.org_mobile-web_all-agents,55a0f4f60bcf,2017-10-29,3,1.0,225.92,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993891,龙生九子_zh.wikipedia.org_mobile-web_all-agents,f1f430469c68,2017-10-30,0,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993892,龙生九子_zh.wikipedia.org_mobile-web_all-agents,f560ef9fa473,2017-10-31,0,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993893,龙生九子_zh.wikipedia.org_mobile-web_all-agents,ec2d0d713c50,2017-11-01,1,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993894,龙生九子_zh.wikipedia.org_mobile-web_all-agents,4f13b0cd46f9,2017-11-02,1,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
8993895,龙生九子_zh.wikipedia.org_mobile-web_all-agents,3464386275bc,2017-11-03,2,0.0,221.72,龙生九子_zh.wikipedia.org_mobile-web_all-agents,222.0
