In [23]:
import pandas as pd
import numpy as np

from scipy.sparse import csr_matrix
from pandas.api.types import CategoricalDtype

from sklearn.decomposition import NMF

from tqdm import tqdm

import warnings
warnings.filterwarnings('ignore')

In [24]:
N_COMP = 50

In [25]:
raw_train = pd.read_csv("../input/train.csv", parse_dates=["first_active_month"])
h_trs = pd.read_csv("../input/historical_transactions.csv", usecols=["card_id", "city_id", "state_id"])

# city

In [26]:
card_city = h_trs[["card_id","city_id"]].groupby(["card_id", "city_id"]).size().reset_index()
card_city.columns = ["card_id", "city_id", "count_"]

In [27]:
person_c = CategoricalDtype(sorted(card_city.card_id.unique()), ordered=True)
thing_c = CategoricalDtype(sorted(card_city.city_id.unique()), ordered=True)

row = card_city.card_id.astype(person_c).cat.codes
col = card_city.city_id.astype(thing_c).cat.codes
sparse_matrix = csr_matrix((card_city["count_"], (row, col)), \
                           shape=(person_c.categories.size, thing_c.categories.size))

In [28]:
#NMF
model = NMF(n_components=N_COMP, init='random', random_state=0) 
embedded = model.fit_transform(sparse_matrix)

df = pd.DataFrame(embedded, columns=["city_NMF_comp_{}".format(i) for i in range(1,N_COMP+1)])
df["card_id"] = person_c.categories.values
df.to_csv("../input/city_nmf.csv", index=False)

# state

In [32]:
N_COMP = 10

In [33]:
card_state = h_trs[["card_id","state_id"]].groupby(["card_id", "state_id"]).size().reset_index()
card_state.columns = ["card_id", "state_id", "count_"]

In [34]:
person_c = CategoricalDtype(sorted(card_state.card_id.unique()), ordered=True)
thing_c = CategoricalDtype(sorted(card_state.state_id.unique()), ordered=True)

row = card_state.card_id.astype(person_c).cat.codes
col = card_state.state_id.astype(thing_c).cat.codes
sparse_matrix = csr_matrix((card_state["count_"], (row, col)), \
                           shape=(person_c.categories.size, thing_c.categories.size))

In [35]:
#NMF
model = NMF(n_components=N_COMP, init='random', random_state=0) 
embedded = model.fit_transform(sparse_matrix)

df = pd.DataFrame(embedded, columns=["state_NMF_comp_{}".format(i) for i in range(1,N_COMP+1)])
df["card_id"] = person_c.categories.values
df.to_csv("../input/state_nmf.csv", index=False)