# NEURAL COLLAVORATIVE FILTERING 

In [1]:
import pandas as pd

# Load preprocessed Data
import sys

sys.path.append("/home/alexabades/recsys")
from src.data.cncf_preprocess import PreProcessDataNCFContextual

In [3]:
path = "../../data/raw/frappe/"
data_file = "frappe.csv"
meta_file = "meta.csv"

key_column = 'item'
user_column = "user"
item_column = "item"
rating_column = "rating"
min_interaction = 5

In [None]:
data = PreProcessDataNCFContextual(
    path=path,
    data_file=data_file,
    meta_file=meta_file,
    key_column=key_column,
    user_column=user_column,
    item_column=item_column,
    rating_column=rating_column,
    min_interactions=min_interaction,
)

In [5]:
data.save_data('frapppe5')

Saved in:  /home/alexabades/recsys/src/data/processed/frappe_ncf/frappe_ncf


# CONTEXT AWARE NEURAL COLLAVORATIVE FILTERING

In [12]:
path = "~/recsys/data/raw/frappe/"
data_file = "frappe.csv"
meta_file = "meta.csv"
key_column = 'item'
user_column = "user"
item_column = "item"
rating_column = "rating"
ctx_categorical_columns = [
    "daytime",
    "weather",
    "isweekend",
    "homework",
]
ctx_numerical_columns = ["cnt"]
columns_to_transform = {"log": "cnt"}
min_interactions = 5

In [13]:
data = PreProcessDataNCFContextual(
    path=path,
    data_file=data_file,
    meta_file=meta_file,
    key_column=key_column,
    user_column=user_column,
    item_column=item_column,
    rating_column=rating_column,
    min_interactions=min_interactions,
    ctx_categorical_columns=ctx_categorical_columns,
    ctx_numerical_columns=ctx_numerical_columns,
    columns_to_transform=columns_to_transform,
)

log transformation performed on ['cnt'] 
Iteration 0
Iteration 1
K-core cleaning performed with k: 5


No Columns specified to normalize: Normalizing all Numerical Columns


In [14]:
data.save_data('frapppe5Context')

Saved in:  /home/alexabades/recsys/src/data/processed/frapppe5Context/frapppe5Context


# Preproces 2

## Data Investigation

In [5]:
# Load raw Frappe data 
path = "../../../data/raw/frappe/frappe.csv"
df = pd.read_csv(path, sep="\t")

In [8]:
df['weather'].unique()
sum(df['weather'] == 'unknown')/len(df)

0.1302350238558049

In [8]:
df.columns

Index(['user', 'item', 'cnt', 'daytime', 'weekday', 'isweekend', 'homework',
       'cost', 'weather', 'country', 'city'],
      dtype='object')

In [27]:
# Mapping from nominal time to hours
time_to_hour = {
    'sunrise': 6,
    'morning': 9,  
    'noon': 12,
    'afternoon': 15,
    'evening': 18,
    'sunset': 18,
    'night': 21
}

# Apply the mapping
df['time_of_day'] = df['daytime'].map(time_to_hour)




Index(['user', 'item', 'cnt', 'daytime', 'weekday', 'isweekend', 'homework',
       'cost', 'weather', 'country', 'city', 'time_of_day'],
      dtype='object')

In [28]:
save_path = "../../../data/raw/frappe/frappe_v2.csv"
df.to_csv(save_path, index=False, sep="\t")

In [23]:
unknown = sum(df.homework == df.homework.unique()[0])
home  =sum(df.homework == df.homework.unique()[1])
work = sum(df.homework == df.homework.unique()[2])

print(f'Unkown: {unknown}')
print(f'Home: {home}')
print(f'Work: {work}')

print(f'Unkown: {unknown/len(df):.2f}')
print(f'Home & Work: {(home+work)/len(df):.2f}')

Unkown: 75670
Home: 15771
Work: 4762
Unkown: 0.79
Home & Work: 0.21


Decided to not use homework as roughly 80% of the instances are unkown and will not add extra information

## Prpreprocess the data

In [1]:
import pandas as pd

# Load preprocessed Data
import sys

sys.path.append("/home/alexabades/recsys")
from src.data.cncf_preprocess import PreProcessDataNCFContextual

In [2]:
path = "~/recsys/data/raw/frappe/"
data_file = "frappe_v2.csv"
meta_file = "meta.csv"
key_column = 'item'
user_column = "user"
item_column = "item"
rating_column = "rating"
ctx_categorical_columns = [
    "weather",
]
ctx_numerical_columns = ["cnt"]
columns_to_transform = {"log": "cnt", "binary":"isweekend", "cyclical": "time_of_day"}
min_interactions = 5


In [3]:
data = PreProcessDataNCFContextual(
    path=path,
    data_file=data_file,
    meta_file=meta_file,
    key_column=key_column,
    user_column=user_column,
    item_column=item_column,
    rating_column=rating_column,
    min_interactions=min_interactions,
    ctx_categorical_columns=ctx_categorical_columns,
    ctx_numerical_columns=ctx_numerical_columns,
    columns_to_transform=columns_to_transform,
)

Processed columns: user, item, rating, weather, cnt, isweekend, time_of_day
log transformation performed on ['cnt'] 
binary transformation performed on ['isweekend'] 
cyclical transformation performed on ['time_of_day'] 
Iteration 0
Iteration 1
K-core cleaning performed with k: 5


No Columns specified to normalize: Normalizing all Numerical Columns


In [4]:
data.save_data('frapppe5ContextV2')

Saved in:  /home/alexabades/recsys/src/data/processed/frapppe5ContextV2/frapppe5ContextV2
