In [32]:
import pandas as pd
import gensim
import spacy
nlp = spacy.load("en_core_web_lg")

In [33]:
data = pd.read_csv('yoga.csv')

In [34]:
data.head()

Unnamed: 0,Asana,Benefits
0,PADOTTHANASANA,This asana strengthens\nthe abdominal muscles ...
1,PARVATASANA,This pose strengthens the nerves and muscles i...
2,ARDHA TITALI ASANA,This is an excellent \npreparatory practice fo...
3,GATYATMAK MERU \nVAKRASANA,This asana removes stiffness \nof the back and...
4,SIDEWAYS VIEWING,Sideways viewing relaxes the \ntension of the ...


In [35]:
data.shape

(326, 2)

In [36]:
# from gensim.parsing.preprocessing import remove_stopwords
# from gensim.parsing.preprocessing import strip_non_alphanum
# from gensim.parsing.preprocessing import strip_numeric
# from gensim.parsing.preprocessing import strip_multiple_whitespaces


In [37]:
def preprocess(text):
    doc = nlp(text.lower())
    tokens = [token.lemma_ for token in doc if token.is_alpha and not token.is_stop]
    return tokens

In [38]:
sentences = [preprocess(benefit) for benefit in data['Benefits']]
word_embedding_model = gensim.models.Word2Vec(sentences, vector_size=100, window=5, min_count=1, sg=1)

In [39]:
def recommend_yoga(user_problem, word_embedding_model):
    user_tokens = preprocess(user_problem)
    similar_yoga = []
    for yoga_name, benefit in zip(data['Asana'],  data['Benefits']):
        benefit_tokens = preprocess(benefit)
        similarity = word_embedding_model.wv.n_similarity(user_tokens, benefit_tokens)
        similar_yoga.append((yoga_name,similarity, benefit))
    similar_yoga.sort(key=lambda x: x[1], reverse=True)
    return similar_yoga

In [40]:
user_problem = "back pain"
recommended_yoga = recommend_yoga(user_problem, word_embedding_model)
print(recommended_yoga[0:6])

[('SARAL DHANURASANA', 0.9987373, 'The same benefits as for dhanurasana, but at decreased levels. This\nposture is useful for lower back pain due to slipped disc or cervical\nspondylitis when it can be performed without discomfort. It tones the heart\nand lungs, and is beneficial for respiratory disorders. It helps to improve the\nposture.'), ('NAUKA SANCHALANASANA', 0.9986896, 'This asana has a positive effect on the pelvis and abdomen and\nreleases energy blockages in these areas. It is especially useful for\ngynaecological disorders and postnatal recovery. It also removes\nconstipation.'), ('Upavistha Titli Asana', 0.99868256, '1. Titli Asana is the Best exercise for relaxing and stretching the aching thighs.\n2. Butterfly Pose helps to open up the hips and thighs and improves flexibility.\n3. Titli Asana is a nice stretch for relieving stress and tiredness.\n4. It stimulates the reproductive and digestive organs.\n5. Helpful for girls throughout menstruation because it helps to all

In [41]:
# df = pd.DataFrame(recommended_yoga, columns=['asana', 'sims_score'])
# print(df)

In [42]:
# df.head(10)

In [43]:
import joblib

from joblib import dump

In [44]:
joblib.dump(word_embedding_model, 'word_embedding_model')

['word_embedding_model']