In [1]:
from sklearn.model_selection import train_test_split       # for splitting dataset
from tensorflow.keras.preprocessing.text import Tokenizer  # to encode text to int
from tensorflow.keras.preprocessing.sequence import pad_sequences   # to do padding or truncating
from tensorflow.keras.models import Sequential     # the model
from tensorflow.keras.layers import Embedding, LSTM, Dense # layers of the architecture
from tensorflow.keras.callbacks import ModelCheckpoint   # save model
from tensorflow.keras.models import load_model   # load saved model
import re
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.naive_bayes import  MultinomialNB
from sklearn.metrics import classification_report
from sklearn.linear_model import LogisticRegression
from math import cos, asin, sqrt, pi

### Finding the cosine similarities based on cuisines column 

In [2]:
def csrc(df):
    list2 = []
    for i in df.cuisines:
        list2.append(list(set(i)))
    
    vect = CountVectorizer(analyzer = lambda x:x)
    matrix2=vect.fit_transform(list2).toarray()
    
    cosine_similarities_cui = cosine_similarity(matrix2, matrix2)
    return cosine_similarities_cui,vect    

### Finding the cosine similarities based on dish_liked column 

In [3]:
def csrd(df):
    list1 = []
    for i in df.dish_liked:
        list1.append(list(set(i)))
    vect = CountVectorizer(analyzer = lambda x:x)
    matrix1=vect.fit_transform(list1).toarray()
    
    cosine_similarities_dish = cosine_similarity(matrix1, matrix1)
    return cosine_similarities_dish,vect

### Finding the cosine similarities based on reviews column 

In [4]:
def csrr(df):
    
    tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 2), min_df=0, stop_words='english')
    tfidf_matrix_reviews = tfidf.fit_transform(df['reviews'])
    
    cosine_similarities_rewiew = cosine_similarity(tfidf_matrix_reviews, tfidf_matrix_reviews)
    return cosine_similarities_rewiew,tfidf

### Sentiment Analysis of the reviews using Linear Regression Model and Multinomial Naive bayes Model

In [5]:
def sentiment_lr_mnb(rating):
    vectorizer = CountVectorizer(token_pattern=r'\b\w+\b',ngram_range=(1,2))
    train_matrix= vectorizer.fit_transform(rating['Lemma'])
    lr = LogisticRegression()
    X_train = train_matrix
    #X_test = test_matrix
    y_train = rating['rate_pred1']
    lr.fit(X_train,y_train)
    clf = MultinomialNB(alpha=1)
    clf.fit(X_train,y_train)
    return lr,clf,vectorizer 


### Sentiment Analysis of the reviews using LSTM Model

In [6]:
def get_max_length(x):
    review_length = []
    for review in x:
        review_length.append(len(review))
    return int(np.ceil(np.mean(review_length)))
def sentiment_lstm(rating):
    x=rating['Lemma']
    y=rating['rate_pred1']
    token = Tokenizer(lower=False)    # no need lower, because already lowered the data in load_data()
    token.fit_on_texts(x)
    x= token.texts_to_sequences(x)

    max_length = get_max_length(x)

    x = pad_sequences(x, maxlen=max_length, padding='post', truncating='post')

    total_words = len(token.word_index) + 1
    EMBED_DIM = 32
    LSTM_OUT = 64

    model = Sequential()
    model.add(Embedding(total_words, EMBED_DIM, input_length = max_length))
    model.add(LSTM(LSTM_OUT))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(optimizer = 'adam', loss = 'binary_crossentropy', metrics = ['accuracy'])
    checkpoint = ModelCheckpoint(
    'models/LSTM.h5',
    monitor='accuracy',
    save_best_only=True,
    verbose=1
    )
    model.fit(x, y, batch_size = 128, epochs = 5, callbacks=[checkpoint])
    return model,token,max_length

### Recommending top restaurants based on cosine similarity

In [7]:
def recommend(index,cosine_similarities,df):
    indices=pd.Series(df.index)
    # Create a list to put top restaurants
    restaurant_recom = []
    for i in index:
        # Find the index of the hotel entered
        idx = indices[indices == i].index[0]
        top_indexes = []
        # Find the restaurants with a similar cosine-sim value and order them from bigges number
        score_series = pd.Series(cosine_similarities[idx]).sort_values(ascending=False)
        # Extract top 30 restaurant indexes with a similar cosine-sim value
        #print(score_series.iloc[0:30])
        for i in score_series.index:
            if(score_series[i]>0.85):
                top_indexes.append(i)
        #Name of top restaurants
        for each in top_indexes:
            restaurant_recom.append(list(df.index)[each])

    df_new = pd.DataFrame(columns=df.columns)

    for item in  restaurant_recom:
        df_new = df_new.append(pd.DataFrame(df[df.columns][df.index == item].sample()))
    return df_new
    

### Function to recommend restaurants based on cost and location

In [8]:
def chooseloc(df,la,lo,cost):
    df['man_dist']=0
    df['loc_dist_km']=0
    lm=[]
    lk=[]
    for i,j in (zip(df.iloc[:,16],df.iloc[:,17])):
        p = pi/180
        a = 0.5 - cos((la-float(i))*p)/2 + cos(la*p) * cos(float(i)*p) * (1-cos((lo-float(j))*p))/2
        d=12742 * asin(sqrt(a))
        m=abs(float(i)-la)+abs(float(j)-lo)
        lm.append(m)
        lk.append(d)
    df["man_dist"]=lm
    df["loc_dist_km"]=lk
    df=df[(df['approx_cost(for two people)']<cost)]
    df=df[df['loc_dist_km']<5]
    df=df[df['final_rate']>3]
    df=df.sort_values(by=["final_rate","loc_dist_km"],ascending=(False,True))
    return df

In [9]:
def choosetime(t,user):
    if(t<dt.time(11,30) and t>=dt.time(5,30)):
        mor=user[(user['Time'].dt.time < dt.time(11,30)) & (user['Time'].dt.time >= dt.time(5,30))]
        return mor
    elif(t>=dt.time(11,30) and t<dt.time(16,0)):
        noon=user[(user['Time'].dt.time >= dt.time(11,30)) & (user['Time'].dt.time < dt.time(16,0))]
        return noon
    else:
        night=user[(user['Time'].dt.time >= dt.time(16,0)) | (user['Time'].dt.time < dt.time(5,30))]
        return night

# call once


In [10]:
rating = pd.read_pickle("Ratings2.pkl")
df=pd.read_pickle("final_final.pkl")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

In [11]:
cosine_similarities_cui,vectc=csrc(df)
cosine_similarities_dish,vectd=csrd(df)
cosine_similarities_final = (cosine_similarities_cui + cosine_similarities_dish)/2.0
#lr,mnb,sentivectlr = sentiment_lr_mnb(rating)
#lstm,lstmtoken,max_length = sentiment_lstm(rating)

In [12]:
lr,mnb,sentivectlr = sentiment_lr_mnb(rating)
lstm,lstmtoken,max_length = sentiment_lstm(rating)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Epoch 1/5

Epoch 00001: accuracy improved from -inf to 0.84284, saving model to models\LSTM.h5
Epoch 2/5

Epoch 00002: accuracy improved from 0.84284 to 0.87840, saving model to models\LSTM.h5
Epoch 3/5

Epoch 00003: accuracy improved from 0.87840 to 0.89378, saving model to models\LSTM.h5
Epoch 4/5

Epoch 00004: accuracy improved from 0.89378 to 0.90695, saving model to models\LSTM.h5
Epoch 5/5

Epoch 00005: accuracy improved from 0.90695 to 0.91918, saving model to models\LSTM.h5


##### Restaurant-Restaurant Recommendation (Content-Based filtering and Knowledge-based filtering)

In [13]:
def rec_user(user,t,lat,long,df):
    timing=choosetime(t,user)
    c=0
    while(len(timing)==0 and c<2):
        c+=1
        if(dt.time(5,30)<=t and t<dt.time(11,30)):
            t=dt.time(12,0)
            timing=choosetime(t,user)
        elif(dt.time(16,0)>t and t>=dt.time(11,30)):
            t=dt.time(17,0)
            timing=choosetime(t,user)
        else:
            t=dt.time(12,0)
            timing=choosetime(t,user)
    #------no timing
    if(len(timing)==0):
        final_rec=chooseloc(df,lat,long,costt)
        return final_rec
    else:
        test_matrix = sentivectlr.transform(timing['Review'])
        predlr = lr.predict(test_matrix)
        prednb=mnb.predict(test_matrix)
        x_test=timing['Review']
        x_test = lstmtoken.texts_to_sequences(x_test)
        x_test = pad_sequences(x_test, maxlen=max_length, padding='post', truncating='post')
        pred = list(lstm.predict(x_test))
        for i in range(len(pred)):
            pred[i]=int(round(pred[i][0],0))
        predlstm=np.array(pred)
        majority=list(predlr+prednb+predlstm)
        l=[]
        n=[]
        for i in range(len(majority)):
            if majority[i] >=2:
                l.append(user['Restaurant_Id'][i])
            else:
                n.append(user['Restaurant_Id'][i])
                continue
        if(len(l)>0):
            rec=recommend(l,cosine_similarities_final,df)
            k = l
        else:
            rec=recommend(n,cosine_similarity_dc,df)
            k = n
        rec= rec.drop_duplicates(['name','address'])
        costt=max(timing['Price'])+100
        final_rec=chooseloc(rec,lat,long,costt)
        return final_rec,k

In [28]:
import datetime as dt
user=pd.read_csv("user1_final.csv")
rest_id = list(user.Restaurant_Id)
user["Time"]=pd.to_datetime((user['Time']))
t=dt.time(10,30)
lat=12.9292731
long=77.5824229
final_rec,timing = rec_user(user,t,lat,long,df)
for i in final_rec.index:
    if i in rest_id:
        final_rec = final_rec.drop(i)
final_rec

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Unnamed: 0,address,name,online_order,book_table,rate,votes,location,rest_type,dish_liked,cuisines,approx_cost(for two people),list_review_rate,reviews,no_of_cuisines,dish_liked_org,final_rate,lat,long,man_dist,loc_dist_km
4019,"16th Main Road, BTM, Bangalore",So-Fat's Dosas,0,0,3.8,43,BTM,Quick Bites,[dosa],[south indian],100,"[3.0, 4.0, 4.0, 5.0, 3.0, 4.0, 5.0, 5.0, 4.0, ...",amazing collection dosas wonder see much varie...,1,[Bhaji Dosa],4.6,12.91127585,77.60456543431182,0.04014,3.124718
175,"Next To BMTC Bus Stand, Banashankari, Bangalore",Srikrishna Bhavan,1,0,3.8,42,Banashankari,Quick Bites,"[roti, dosa]","[south indian, north indian, chinese]",150,[5.0],sri krishna bhavan best hotel bangalore situat...,3,"[Roti, Masala Dosa]",4.4,12.96528325,77.59445195,0.048039,4.210993
3468,"77, Near Ayyappa Temple, Hosur Main Road, Madi...",Krishna Sagar,1,0,3.9,52,BTM,Quick Bites,[dosa],"[south indian, north indian, chinese]",150,"[5.0, 4.0]",good taste many times like poori kurna idly sa...,3,[Masala Dosa],4.2,12.91127585,77.60456543431182,0.04014,3.124718
273,"Near Reliance Trends, 1st Cross, 3rd Phase, Ba...",Sri Sai 99 Variety Dosa,1,0,3.8,25,Banashankari,Quick Bites,"[dosa, dosa, dosa, dosa, dosa]",[south indian],150,"[4.0, 5.0, 5.0, 4.0]",sweet corn schezwan dosa golmaal dosa yesterda...,1,"[Sweet Corn Mushroom Schezwan Paneer Dosa, Pan...",4.2,12.96528325,77.59445195,0.048039,4.210993
4110,"148/2, 20th Main Road, 2nd Cross, 1st Stage",Sagar Fast Food,1,0,3.7,66,BTM,Quick Bites,"[idli, dosa]","[south indian, chinese, north indian]",150,"[5.0, 4.0]",one best place eat breakfast masala dosa idli ...,3,"[Idli, Masala Dosa]",4.1,12.91127585,77.60456543431182,0.04014,3.124718
5679,"140, Dayanand Sagar College Road, Colony, 99 S...",Sri Devi Sagar,0,0,4.5,0,Kumaraswamy Layout,Quick Bites,[dosa],[south indian],50,[4.0],idli wada sambhar must try placen parcel thick...,1,[Masala Dosa],4.0,12.9081487,77.5553179,0.048229,3.761282
3447,"314/B, 20th Main, 80 Feet Road, 8th Block, Opp...",Sagar Fast Food,1,0,4.0,137,Koramangala 8th Block,Quick Bites,[dosa],"[south indian, north indian, fast food, chinese]",125,[4.0],sagar fast food one best mangalorean restauran...,4,[Neer Dosa],4.0,12.9417812,77.6160146,0.0461,3.897076
6262,"6th Block, Sony World Signal, Koramangala 6th ...",99 Variety Dosa & Pav Bhaji,1,0,3.8,16,Koramangala 6th Block,Quick Bites,"[dosa, dosa, dosa, dosa, dosa]","[south indian, north indian, fast food]",125,"[4.0, 4.0, 4.0]",love customised dosas place offers lot variety...,3,"[Green Peas Cheese Masala Dosa, Gobi Masala Do...",3.9,12.93075555,77.62265777504285,0.041717,4.363589
1157,"Big Bazar Compound, Opposite Central Mall, 9th...",Sri Sai 99 Variety Dosa,1,0,3.6,37,Jayanagar,Quick Bites,"[dosa, dosa, dosa, dosa, dosa]",[south indian],150,"[5.0, 3.0]",small thela opposite central mall jayanagar au...,1,"[Onion Masala Dosa, Paneer Sada Dosa, Sweet Co...",3.8,12.9292731,77.5824229,0.0,0.0
3474,"Makana Tower, Tavarekere Main Road",Ashirvaad Grand,1,0,4.0,113,BTM,Quick Bites,"[dosa, paneer butter masala]","[chinese, north indian, south indian]",200,"[5.0, 1.0, 4.0, 4.0]",quick delivery food qualitatively done food t...,3,"[Masala Dosa, Panneer Butter Masala]",3.8,12.91127585,77.60456543431182,0.04014,3.124718


### Evaluation Metrics

In [29]:

df1=df.loc[timing]
rdf=vectd.transform(df1['dish_liked']).toarray()
udf=vectd.transform(final_rec['dish_liked']).toarray()
dish_acc = cosine_similarity(rdf,udf)
rdf=vectc.transform(df1['cuisines']).toarray()
udf=vectc.transform(final_rec['cuisines']).toarray()
cui_acc = cosine_similarity(rdf,udf)
print(dish_acc)
print(cui_acc)


[[1.         0.70710678 1.         1.         0.70710678 1.
  1.         1.         1.         0.70710678 1.         0.5547002
  0.9701425  1.         1.         0.70710678 0.9701425 ]
 [0.83205029 0.58834841 0.83205029 0.83205029 0.58834841 0.83205029
  0.83205029 0.83205029 0.83205029 0.58834841 0.83205029 0.46153846
  0.80720735 0.83205029 0.83205029 0.78446454 0.80720735]
 [0.66666667 0.47140452 0.66666667 0.66666667 0.70710678 0.66666667
  0.66666667 0.66666667 0.66666667 0.47140452 0.66666667 0.36980013
  0.72760688 0.66666667 0.66666667 0.70710678 0.72760688]
 [1.         0.70710678 1.         1.         0.70710678 1.
  1.         1.         1.         0.70710678 1.         0.5547002
  0.9701425  1.         1.         0.70710678 0.9701425 ]
 [0.5        0.35355339 0.5        0.5        0.70710678 0.5
  0.5        0.5        0.5        0.35355339 0.5        0.2773501
  0.60633906 0.5        0.5        0.70710678 0.60633906]]
[[0.70710678 0.81649658 0.81649658 0.70710678 0.8164965

In [37]:
final_acc=0
for i in dish_acc:
    print(sum(i)/len(final_rec))
    final_acc+=sum(i)/len(final_rec)
final_acc=final_acc/len(timing)
    
    

0.9013771953683579
0.7615274039590848
0.6381590089209789
0.9013771953683579
0.5065499156972895


In [38]:
final_acc

0.7417981438628137