In [53]:
import pandas as pd
import urllib.parse
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
ds = pd.read_csv("search_24.csv")
tf = TfidfVectorizer(analyzer='word', ngram_range=(1, 20), min_df=0, stop_words='english')

term = ds['term'].astype(str).str.replace('+',' ',regex=True)
term = term.map(lambda x:urllib.parse.unquote(x))
term = term.apply(lambda x: x.strip('HTTP/1.1'))
user = ds['user_id']
user_term = pd.DataFrame(user)
user_term.insert(1,"term",term,True)
user_term.style.hide_index()
# print(user_term[:5].to_string(index=False))

tfidf_matrix = tf.fit_transform(user_term['term'])
cosine_similarities = linear_kernel(tfidf_matrix, tfidf_matrix)
results = {}

for idx, row in ds.iterrows():
    similar_indices = cosine_similarities[idx].argsort()[:-10:-1]
    similar_items = [(cosine_similarities[idx][i], user_term['user_id'][i]) for i in similar_indices]
    results[row['user_id']] = similar_items[1:]

def clean_list(list_item):
    if isinstance(list_item, list):
        for index in range(len(list_item)):
            if isinstance(list_item[index], list):
                list_item[index] = clean_list(list_item[index])
            if not isinstance(list_item[index], (int, tuple, float, list)):
                list_item[index] = list_item[index].strip()
    return list_item

def item(id):
    return clean_list(user_term.loc[user_term['user_id'] == id]['term'].tolist()[0])

def users_id(id):
    return clean_list(user_term.loc[user_term['user_id'] == id]['term'].to_string(index=False).split("\n"))

def recommend(id, num):
    recommend_list = []
    if (num == 0):
        print("Unable to recommend any search word as you have not chosen the number of search word to be recommended")
    elif (num==1):
        print("Recommending "+ id +", " + str(num) + " words similar to " + str(users_id(id)))
    else :
         print("Recommending "+ id +", " + str(num) + " words similar to " + str(users_id(id)))
    print("----------------------------------------------------------")
    recs = results[id][:num]
    for rec in recs:
            # print("You may also like to read: " + str(rec[1]) + " (score:" + str(rec[0]) + ")")
            if str(rec[0])!= '0.0'and str(rec[0])!='1.0':
                recommend_list.append(item(rec[1])+" "+ str(rec[0]))
   
    if set(recommend_list):
        for item_list in set(recommend_list):
            print(item_list)
        print()
    else:
        print("Please search more words to get recommendedation")
        print()

for id in set(user):
    recommend(id,10)
    

Recommending etsyben, 10 words similar to ['negative rates']
----------------------------------------------------------
brexit fx gbpusd  0.2119756197342947
arsley Energy / Jagged Peak Energy: Merger Agreement Continues Trend of Permian Consolidation and Negative Investor Response  0.048488539499497676

Recommending dawsonge, 10 words similar to ['European Pharmaceuticals']
----------------------------------------------------------
supply update  0.18931857613875905
european high yield  0.13067195007220003
european banks  0.20585480330169495
asset managers  0.20585480330169495
U.S. Airlines  0.34292346899211756
european banks preview  0.13347141360410367
european credit alpha  0.13637791756615064

Recommending hsuben, 10 words similar to ['nickel']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending seideada, 10 words similar to ['aecom upgrade']
----------------------------------------------------------
Please searc

Recommending bignolib, 10 words similar to ['lebanon']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending wusiyao, 10 words similar to ['berkeley']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending bossueti, 10 words similar to ['ECB']
----------------------------------------------------------
ecb qe  0.5424031076904392

Recommending parlepra, 10 words similar to ['nvidia']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending casperts, 10 words similar to ['ross smotrich', 'ross smotrich']
----------------------------------------------------------
smotrich  0.560173460893011
ross smotrich  0.9999999999999998

Recommending morossja, 10 words similar to ['datadog', 'datadog']
----------------------------------------------------------
Please search more words to get recommendedati

Recommending cowlingh, 10 words similar to ['fabege']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending vanwout1, 10 words similar to ['tomtom']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending garradvi, 10 words similar to ['EEMEA Instant Insights - Turkey (EM Macro & So...']
----------------------------------------------------------
ashish  0.09323950901539729
Daily Macro & Strategy Research Summary  0.038852322364809844
usd cds  0.10437852480319963
supply update  0.08651739809871098
Global Daily Macro  0.06256931823377457
em flows  0.09077522029694683
Daily Macro & Strategy Research Summary (London Open)  0.027601868356006493

Recommending chooadri1, 10 words similar to ['repo', 'repo', 'repo', 'easing', 'quantitative easing', 'qe']
----------------------------------------------------------
ecb qe  0.5623176048323902

Recommending hoamy1, 1

Recommending g09636302, 10 words similar to ['halma plc']
----------------------------------------------------------
atlassian  0.5734027849349115
NMC Health PLC: A clean beat in H1, new buyback & reports of bidders for NMC stake  0.055668840303494956
qiagen  0.3083556050974016

Recommending reillepa, 10 words similar to ['rogers communication']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending philleli, 10 words similar to ['Julien Roch', 'Julien Roch']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending dolceern, 10 words similar to ['ecuador']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending grzesies, 10 words similar to ['peloton']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending makishij, 10 words 

Global Outlook  0.12147239901615722
ludovico sapio  0.12147239901615722
Kai Chang  0.1424038223279636
weekly recap  0.1391995985185401
usmca  0.7446569189807201
iPhone  0.2993077018972554
ashish  0.14298717152358223

Recommending griffsta, 10 words similar to ['it is all about the catalysts']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending lferrara, 10 words similar to ['ijb']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending cartenat, 10 words similar to ['china inflation', 'ecb']
----------------------------------------------------------
ecb qe  0.5424031076904392

Recommending daiqihon, 10 words similar to ['covered bond primer']
----------------------------------------------------------
singapore bond  0.19903257290750462
01  0.3242018066018127
reits handbook  0.16027398703252388
01  0.16801120635798283
iboxx bond  0.18810408567840892
pri

food places in Clinton  0.16209492007265594
supply update  0.12075985557454555
food places in Clinton  0.33150455179459126
food places in Clinton  0.16510358116279134
food places in Clinton  0.16151268796598273
food places in Clinton  0.15601053120490582

Recommending saitabdu, 10 words similar to ['US credit alpha']
----------------------------------------------------------
US credit alpha  1.0000000000000002
IG Credit  0.2260304168763358
Addressing the potential pitfalls of cds  0.2208691705929248
STATISTICS  0.15289114908671428
daily credit  0.24116485201500898
european credit alpha  0.6808410414935867
supply update  1.0000000000000002
shale initiation  0.2208691705929248

Recommending ardurag, 10 words similar to ['prosus', 'prosus']
----------------------------------------------------------
Please search more words to get recommendedation

Recommending howellge1, 10 words similar to ['european credit alpha']
----------------------------------------------------------
european banks