In [20]:
import numpy as np
import pandas as pd
from surprise import KNNBaseline
from sklearn.ensemble import RandomForestRegressor
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_validate
from sklearn.compose import ColumnTransformer

In [6]:
df_ratings = pd.read_csv("../data/new_df.csv")
df_users = pd.read_csv("../data/users_fixed.csv")

In [7]:
df_users["userID"] = pd.to_numeric(df_users["userID"], errors="coerce")

In [8]:
df = pd.merge(df_ratings, df_users, on="userID")

In [9]:
model = KNNBaseline(**{'verbose': False, 'k': 100, 'min_k': 5, 'sim_options': {'name': 'msd', 'user_based': False}})


In [30]:
X = df[["age", "gender", "mood", "anxiety", "libido", "cognition", "motivation", "focus"]]
y = df[["rating"]].to_numpy().reshape(-1)

numerical_columns = ["age"]
categorical_columns = ["gender", "mood", "anxiety", "libido", "cognition", "motivation", "focus"]
ct = ColumnTransformer(
    [("scaler", StandardScaler(), numerical_columns),
     ("encoder", OneHotEncoder(handle_unknown="ignore"), categorical_columns)])

pipe = Pipeline([('encoder', ct), ('reg', LinearRegression())])

#pipe.fit(X, y)

In [31]:
cross_validate(pipe, X, y, scoring="neg_root_mean_squared_error")

{'fit_time': array([0.13904524, 0.10308599, 0.11220312, 0.10118914, 0.10502315]),
 'score_time': array([0.00386262, 0.00265312, 0.00276494, 0.00270796, 0.00256586]),
 'test_score': array([-2.86087617, -2.96851162, -3.04360486, -3.0201797 , -3.05101399])}

In [32]:
from sklearn.dummy import DummyRegressor
pipe_dumb = Pipeline([('encoder', ct), ('reg', DummyRegressor())])
cross_validate(pipe_dumb, X, y, scoring="neg_root_mean_squared_error")


{'fit_time': array([0.01813912, 0.01352882, 0.01020098, 0.00820208, 0.00683475]),
 'score_time': array([0.00503778, 0.00477219, 0.00345707, 0.00276518, 0.00232601]),
 'test_score': array([-3.12178076, -3.181965  , -3.20067359, -3.18234548, -3.21047638])}