In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import fastFM
from fastFM import als
from sklearn.preprocessing import OneHotEncoder
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler, OneHotEncoder
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import Ridge
from scipy.sparse import hstack

In [2]:
# load example data
boston = datasets.load_boston()
df = pd.DataFrame(data=np.c_[boston['data'], boston['target']],
             columns=boston['feature_names'].tolist() + ['target']) 
X = df.drop(['target'], axis=1)
y = df['target']

In [3]:
X_train, X_valid, y_train, y_valid = train_test_split(
    X, y, test_size=0.2, random_state=20180418)

In [4]:
# fastFM requires sparse input, so just regard some features categorical ones and do encoding
mask = X_train.nunique() <= 30
sparse_cols = X_train.columns[mask]
dense_cols = X_train.columns[~mask]

# encode "categorical" features
onehot_encoder = OneHotEncoder()
X_train_sparse = onehot_encoder.fit_transform(X_train[sparse_cols])
X_valid_sparse = onehot_encoder.transform(X_valid[sparse_cols])

# rescale the remaining features
robust_scaler = RobustScaler()
X_train_dense_fm = robust_scaler.fit_transform(X_train[dense_cols])
X_valid_dense_fm = robust_scaler.transform(X_valid[dense_cols])
X_train_dense = robust_scaler.fit_transform(X_train)
X_valid_dense = robust_scaler.transform(X_valid)

In [5]:
X_train_fm = hstack([X_train_sparse, X_train_dense_fm])
X_valid_fm = hstack([X_valid_sparse, X_valid_dense_fm])

In [6]:
# Adapted from http://ibayer.github.io/fastFM/tutorial.html
fm = als.FMRegression(n_iter=1000, init_stdev=0.1, 
                      rank=3, l2_reg_w=0.5, l2_reg_V=1)
fm.fit(X_train_fm, y_train)
pred_valid_fm = fm.predict(X_valid_fm)
rmse_fm = np.sqrt(mean_squared_error(pred_valid_fm, y_valid))
print("RMSE (FM): {:.5f}".format(rmse_fm))

RMSE (FM): 3.48930


In [7]:
ridge = Ridge(alpha=1)
ridge.fit(X_train_dense, y_train)
pred_valid_r = ridge.predict(X_valid_dense)
rmse_r = np.sqrt(mean_squared_error(pred_valid_r, y_valid))
print("RMSE (Ridge): {:.5f}".format(rmse_r))

RMSE (Ridge): 4.49382


In [8]:
ridge_fm = Ridge(alpha=1)
ridge_fm.fit(X_train_fm, y_train)
pred_valid_rfm = ridge_fm.predict(X_valid_fm)
rmse_rfm = np.sqrt(mean_squared_error(pred_valid_rfm, y_valid))
print("RMSE (Ridge with FM features): {:.5f}".format(rmse_rfm))

RMSE (Ridge with FM features): 4.41519
