In [None]:
import numpy as np
import pandas as pd

import os

from fastai.vision.all import *
from fastai.collab import *
from fastai.tabular.all import *

# Fashion recommendations

In [None]:
path = Path('../input/h-and-m-personalized-fashion-recommendations')

In [None]:
ar_df = pd.read_csv(path/'articles.csv')
cu_df = pd.read_csv(path/'customers.csv')
tr_df = pd.read_csv(path/'transactions_train.csv')

In [None]:
tr_df.head()

In [None]:
tr_df.info()

In [None]:
tr_df['bought'] = 1

In [None]:
tr_df.info()

In [None]:
tr_df.drop(['sales_channel_id', 'price'], inplace=True, axis=1)
tr_df = tr_df.loc[tr_df["t_dat"] >= '2020-09-1']
tr_df.shape

In [None]:
dls = CollabDataLoaders.from_df(tr_df, user_name = 'customer_id', item_name='article_id', rating_name = "bought", bs=64)
dls.show_batch()

In [None]:
n_customers  = len(dls.classes['customer_id'])
n_articles = len(dls.classes['article_id'])
n_factors = 5

c_factors = torch.randn(n_customers, n_factors)
a_factors = torch.randn(n_articles, n_factors)

In [None]:
torch.randn(n_customers, n_factors)

## Maskinlæringsmodell

In [None]:
class DotProduct(Module):
    def __init__(self, n_customers, n_articles, n_factors, y_range=(0,1)):
        self.c_factors = Embedding(n_customers, n_factors)
        self.c_bias = Embedding(n_customers, 1)
        self.a_factors = Embedding(n_articles, n_factors)
        self.a_bias = Embedding(n_articles, 1)
        self.y_range = y_range
        
    def forward(self, x):
        customers = self.c_factors(x[:,0])
        articles = self.a_factors(x[:,1])
        res = (customers * articles).sum(dim=1, keepdim=True)
        res += self.c_bias(x[:,0]) + self.a_bias(x[:,1])
        return sigmoid_range(res, *self.y_range)

In [None]:
x,y = dls.one_batch()
x.shape

## Deeplearning modell

In [None]:
embs = get_emb_sz(dls)
embs

In [None]:
class CollabNN(Module):
    def __init__(self, user_sz, item_sz, y_range=(0,1.5), n_act=100):
        self.c_factors = Embedding(*user_sz)
        self.a_factors = Embedding(*item_sz)
        self.layers = nn.Sequential(
            nn.Linear(user_sz[1]+item_sz[1], n_act),
            nn.ReLU(),
            nn.Linear(n_act, 1))
        self.y_range = y_range
        
    def forward(self, x):
        embs = self.c_factors(x[:,0]),self.a_factors(x[:,1])
        x = self.layers(torch.cat(embs, dim=1))
        return sigmoid_range(x, *self.y_range)

In [None]:
model = CollabNN(*embs)

In [None]:
learn = Learner(dls, model, loss_func=MSELossFlat(), metrics=accuracy)
learn.fit_one_cycle(5, 5e-3, wd=0.01)

In [None]:
model1 = DotProduct(n_customers, n_articles, 50)
learn1 = Learner(dls, model1, loss_func=MSELossFlat(), metrics=accuracy)

In [None]:
learn1.fit_one_cycle(1, 5e-3, wd=0.1)

In [None]:
learn2 = collab_learner(dls, n_factors=50, y_range=(0, 1.5))

In [None]:
learn2.fit_one_cycle(1, 5e-3, wd=0.1)

In [None]:
rec = pd.read_csv(path/'sample_submission.csv')
test_dl = learn.dls.test_dl(tr_df)
preds, _ = learn.get_preds(dl=test_dl)


In [None]:
predi = dls.predict(testdf)

In [None]:
preds

In [None]:
learn.get_preds()

In [None]:
user_item = tr_df.groupby(['int_c_id', 'int_a_id']).count()

In [None]:
user_item.head()

# # Kollonne-informasjon

product_code / prod_name

product_type_no(131) / product_type_name(130)

graphical_appearance_no(29) / graphical_appearance_name(29)

product_group_name

colour_group_code(49) / colour_group_name(49)

perceived_colour_value_id(7) / perceived_colour_value_name(7)

perceived_colour_master_id(19) / perceived_colour_master_name(19)

department_no(298) / department_name(249)

index_code(4) / index_name(4)

index_group_no(9) / index_group_name(9)

section_no(56) / section_name(55)

garment_group_no(55) / garment_group_name(20)

detail_desc

In [None]:
#Ser på de ulike kollonene for å se hvor mange ulike verdier de inneholder og dermed hvor viktig de er 
#(med unntak av 'product_code', 'prod_name', 'product_group_name' og 'detail_desc') 
antall_sections = ar_df.index_name.value_counts()
for count, item in enumerate(antall_sections.index.values):
    print(count,item)