# Final Project: Restaurant Recommander System

## Imports

In [11]:
import pandas as pd
import numpy as np
import yaml
import os
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.model_selection import train_test_split
from scipy.sparse import coo_matrix
from scipy.sparse.linalg import svds

In [3]:
config_file = open("config.yml", "r")
config = yaml.load(config_file, Loader=yaml.FullLoader)
DATA_DIRECTORY = config["DATA_DIRECTORY"]

## Chargement des données

In [4]:
# Données relatives aux commandes des customers aux vendors
order_df = pd.read_csv(os.path.join(DATA_DIRECTORY, "orders.csv"))

# Données des vendeurs(localisation, identifiant)
vendors_df = pd.read_csv(os.path.join(DATA_DIRECTORY, "vendors.csv"))

# Données des clients (date de naissance, ID, genre)
train_customer_df = pd.read_csv(os.path.join(DATA_DIRECTORY, "train_customers.csv"))

# Localisations des clients
train_locations_df = pd.read_csv(os.path.join(DATA_DIRECTORY, "train_locations.csv"))

# Ensemble de tests sur les données des clients
test_customer_df = pd.read_csv(os.path.join(DATA_DIRECTORY, "test_customers.csv"))

# Ensemble de tests sur les localisations des clients
test_locations_df = pd.read_csv(os.path.join(DATA_DIRECTORY, "test_locations.csv"))

  order_df = pd.read_csv(os.path.join(DATA_DIRECTORY, "orders.csv"))


## Definition d'une super-classe de modèle

In [5]:
class RecommanderModel:
    model_type = "undefined"
    # Définition du schéma de sortie du modèle
    output_schema = config["output_schema"]

    def __init__(self, model_name: str) -> None:
        self.model_name = model_name
        pass

    def train(self, **kwargs) -> None:
        pass

    def predict(self, **kwargs) -> None:
        pass

    def save_predictions(self, **kwargs) -> None:
        pass

    def print_model(self):
        print(f"{self.model_type} recommander model\nmodel name: '{self.model_name}'")

In [34]:
order_with_loc_df = pd.merge(
    order_df[["customer_id", "LOCATION_NUMBER", "vendor_id", "vendor_rating","is_rated"]],
    train_locations_df,
    left_on=["customer_id", "LOCATION_NUMBER"],
    right_on=["customer_id", "location_number"],
)
order_with_loc_df["is_rated"] = order_with_loc_df["is_rated"].apply(lambda b: 1 if b =='Yes' else 0 )
order_with_loc_df

Unnamed: 0,customer_id,LOCATION_NUMBER,vendor_id,vendor_rating,is_rated,location_number,location_type,latitude,longitude
0,92PEE24,0,105,,0,0,,-0.132100,-78.575297
1,92PEE24,0,105,,0,0,,-0.132100,-78.575297
2,92PEE24,0,105,,0,0,,-0.132100,-78.575297
3,QS68UD8,0,294,,0,0,Work,-0.393396,-78.544417
4,MB7VY5F,0,83,,0,0,,-1.072823,-78.464121
...,...,...,...,...,...,...,...,...,...
135298,3S6VG6R,1,199,5.0,1,1,,2.284875,0.717124
135299,ND4PIJL,0,907,,0,0,,13.380083,-1.387421
135300,1NRK5HF,0,105,,0,0,,-0.772600,0.231851
135301,QDXLWM7,1,28,,0,1,,1.751487,0.375234


In [35]:
train_order_df, test_order_df = train_test_split(
    order_with_loc_df, test_size=0.2, random_state=42
)
train_order_df, val_order_df = train_test_split(
    train_order_df, test_size=0.2, random_state=42
)


In [9]:
def f1_score():
    pass


In [32]:



# Créer une table pivot en utilisant 'customer_id', 'vendor_id' et 'LOCATION_NUMBER' comme index
Y_matrix = pd.pivot_table(train_order_df, 
                                values='vendor_rating', 
                                index=['customer_id', 'LOCATION_NUMBER'], 
                                columns='vendor_id', 
                                fill_value=0)

# Afficher la matrice d'évaluation utilisateur-vendeur
Y_matrix

Unnamed: 0_level_0,vendor_id,4,13,20,23,28,33,43,44,55,66,...,681,841,843,845,846,849,855,856,858,907
customer_id,LOCATION_NUMBER,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
000THBA,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00GV4J4,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00HWUU3,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00HWUU3,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
00OT8JX,3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
ZZP5BHU,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZRJABJ,2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZVGIVK,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ZZVLIB5,1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
pd.pivot_table(train_order_df, 
                                values='vendor_rating', 
                                index=['customer_id', 'LOCATION_NUMBER'], 
                                columns='vendor_id', 
                                fill_value=0)

In [29]:

# Effectuer la décomposition en valeurs singulières
matrix = Y_matrix.values
u, s, vh = np.linalg.svd(matrix, full_matrices=False)

In [None]:
def cosine_similarity(v: np.ndarray, u: np.ndarray) -> float:
    return (np.dot(v, u)) / (np.linalg.norm(v) * np.linalg.norm(u))


## Funk SVD

### Initialize Matrices

TypeError: type not understood