In [1]:
import pandas as pd 
import numpy as np
from sklearn.cluster import KMeans
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import PCA
from scipy.sparse import csr_matrix
from sklearn.decomposition import TruncatedSVD
from sklearn.metrics import precision_score, recall_score, f1_score

In [2]:
df = pd.read_excel("content_rec_df.xlsx" , engine= "openpyxl")

In [3]:
df.shape

(38299, 6)

In [4]:
df.head()

Unnamed: 0,Montant,Ville de l'affilié,Nom de l'affilié,Adresse PV,Famille Aff.,Groupe Aff.
0,39.0,BARDO,ECHEMI,"AV. HEDI NOUIRA ,ENNASR II",FAST FOOD,RESTAURATION
1,13.0,LA SOUKRA,WOOD'S PIZZA,"3, RES. EL BOSTENE E4",PIZZERIA,RESTAURATION
2,22.31,CITE EL GHAZELA,DI NAPOLI,"45B, AV. HABIB BOURGUIBA",RESTAURANT A LA CARTE,RESTAURATION
3,8.5,CITE EL GHAZELA,EL MAGICO,"95, AV. FETHI ZOUHIR",PIZZERIA,RESTAURATION
4,24.0,DEN-DEN,PIZZERIA KAPARI,"10,RUE IBN DHIEF",RESTAURANT A LA CARTE,RESTAURATION


In [5]:
df["Nom de l'affilié"].unique()

array(['ECHEMI', "WOOD'S PIZZA", 'DI NAPOLI', 'EL MAGICO',
       'PIZZERIA KAPARI', ' LE COIN', 'RESTAURANT KFC', 'PLAN B',
       'BAGUETTE ET BAGUETTE', 'BROWN SUGAR', 'RESTAURANT LE MAGICIEN',
       'PARADICE', 'HOBO', 'HA FOOD', 'PAPA JOHN S', 'PARAD ICE',
       'GOURMANDISE', "RESTAURANT L'OLIVIER", 'RESTAURANT ERRAYEN',
       'THE FOOD COURT \t', 'PIZZERIA BLACK HOUSE\t',
       'CAFE RESTO LES JASMINS', 'PÂTISSERIE TAKACIM', 'LATINA BISTROT\t',
       'MALIBU', 'GOURMANDISE ', 'LE GOURMET', 'BOMBAY', 'JAMROCK',
       'RESTAURANT LES DAUPHINS', 'ELY S COFFE SHOP', 'CHILI S',
       'CANTINE NEWREST', 'CAFETRIA AMIGO', 'PRET A MANGER', ' MALIBU ',
       'PATISSERIE FANY', 'YUMMY', 'BUVETTE ACTIA', 'RESTAURANT WOW',
       'CHOUPRALINE', 'RESTAURANT BREAKTIME', 'RESTAURANT MARHABA',
       'DEJA VU', 'RESTO NEGOCE', 'LE BON COIN', 'SKIFA ARBI',
       'RESTAURANT BRONX', 'RESTAURANT COOL TOUNSI', 'SCHNELL',
       '   MALIBU ', 'THE GARDEN BISTROT', 'SALON DE THE COMME CHEZ T

In [6]:
categorical_cols = ["Ville de l'affilié", "Nom de l'affilié","Adresse PV", "Famille Aff.","Groupe Aff."]  
data_encoded = pd.get_dummies(df, columns=categorical_cols)

data_sparse = csr_matrix(data_encoded.values)

n_components = 2  # Choose the number of components to reduce to
svd = TruncatedSVD(n_components=n_components)
data_svd = svd.fit_transform(data_sparse)

data_svd = np.c_[data_svd, df["Montant"].values]

cosine_similarities = cosine_similarity(data_svd)

#print(cosine_similarities)

def recommend_restaurants(Name, cosine_similarities, df):
    indices = pd.Series(df.index, index=df["Nom de l'affilié"]).drop_duplicates()

    idx = indices[Name]

    sim_scores = list(enumerate(cosine_similarities[idx]))

    sim_scores = sorted(sim_scores, reverse=True)
 
    sim_scores = sim_scores[1:11]

    restaurant_indices = [i[0] for i in sim_scores]

    return df["Nom de l'affilié"].iloc[restaurant_indices]

recommend_restaurants("BAGUETTE ET BAGUETTE", cosine_similarities, df)

2961             PLAN B
2960      BUVETTE ACTIA
2959            SCHNELL
2958    CANTINE NEWREST
2957             BOMBAY
2956             PLAN B
2955             BOMBAY
2954             PLAN B
2953      BUVETTE ACTIA
2952            SCHNELL
Name: Nom de l'affilié, dtype: object

In [7]:
recommend_restaurants("PLAN B", cosine_similarities, df).drop_duplicates()

1661                    LE GOURMET
1660                 PRET A MANGER
1659    BUVETTE RESTO SALHI FAKHER
1658                       CHILI S
1657            PÂTISSERIE TAKACIM
1656                       HA FOOD
1655                        PLAN B
1653            RESTAURANT MARHABA
1652          BAGUETTE ET BAGUETTE
Name: Nom de l'affilié, dtype: object

In [8]:
recommend_restaurants("GOURMANDISE", cosine_similarities, df).drop_duplicates()

987                    PLAN B
986    CAFE RESTO LES JASMINS
984      BAGUETTE ET BAGUETTE
981      RESTAURANT L'OLIVIER
980            RESTAURANT KFC
978              WOOD'S PIZZA
Name: Nom de l'affilié, dtype: object

In [9]:
recommend_restaurants("BOMBAY", cosine_similarities, df).drop_duplicates()

243              PRET A MANGER
241            CANTINE NEWREST
239    RESTAURANT LES DAUPHINS
235               WOOD'S PIZZA
Name: Nom de l'affilié, dtype: object

In [10]:
recommend_restaurants("PARADICE", cosine_similarities, df).drop_duplicates() 

372    BUVETTE RESTO SALHI FAKHER
371               CANTINE NEWREST
370        PIZZERIA BLACK HOUSE\t
369                 PRET A MANGER
368                   GOURMANDISE
365        RESTAURANT COOL TOUNSI
364         RESTAURANT CRISTALUIM
363                   BROWN SUGAR
Name: Nom de l'affilié, dtype: object

In [11]:
#recommend_restaurants("BOMBAY", cosine_similarities, df).drop_duplicates() 

In [12]:
recommend_restaurants("PATISSERIES LA FRIANDISE", cosine_similarities, df).drop_duplicates() 

73    BAGUETTE ET BAGUETTE
72         PIZZERIA KAPARI
71              LE GOURMET
70    RESTAURANT L'OLIVIER
69      PÂTISSERIE TAKACIM
68      RESTAURANT ERRAYEN
67        LATINA BISTROT\t
64                  PLAN B
Name: Nom de l'affilié, dtype: object

In [13]:
recommend_restaurants("PRET A MANGER", cosine_similarities, df).drop_duplicates() 

1880    BAGUETTE ET BAGUETTE
1879                 HA FOOD
1874            WOOD'S PIZZA
1872      PÂTISSERIE TAKACIM
1871             PAPA JOHN S
Name: Nom de l'affilié, dtype: object

In [14]:
recommend_restaurants("CAFETRIA AMIGO", cosine_similarities, df).drop_duplicates() 

151                 YUMMY
150         PRET A MANGER
149    PÂTISSERIE TAKACIM
148       PATISSERIE FANY
147      LATINA BISTROT\t
146           GOURMANDISE
144                BOMBAY
143               MALIBU 
Name: Nom de l'affilié, dtype: object

In [15]:
recommend_restaurants("CHILI S", cosine_similarities, df).drop_duplicates() 

258        LATINA BISTROT\t
257               DI NAPOLI
256         CANTINE NEWREST
255      PÂTISSERIE TAKACIM
253    RESTAURANT BREAKTIME
252             CHOUPRALINE
Name: Nom de l'affilié, dtype: object

In [16]:
recommend_restaurants("ECHEMI", cosine_similarities, df).drop_duplicates() 

616    CANTINE NEWREST
615          EL MAGICO
614      BUVETTE ACTIA
613       RESTO NEGOCE
610        SPOON FOODS
607       WOOD'S PIZZA
Name: Nom de l'affilié, dtype: object