In [None]:
!pip install matplotlib

In [None]:
!pip install spacy

In [None]:
import pandas as pd
import re
import string
import matplotlib.pyplot as plt
import numpy as np
import spacy
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from spacy.tokenizer import Tokenizer
from sklearn.neighbors import NearestNeighbors
import en_core_web_lg

In [None]:
df = pd.read_csv('medical.csv')
df.head()

In [None]:
def make_into_list(words):
    lst = list(words.split(" "))
    return lst

In [None]:
df = df.replace('None', np.nan)
df = df.dropna().reset_index()

In [None]:
df['ailments'] = df['alments'].str.replace(',', ' ')


df.head()

In [None]:
print(spacy.__version__)  

In [None]:
nlp = spacy.load("en_core_web_lg")
df.head()

# The Tokenizer
tokenizer = Tokenizer(nlp.vocab)

# Make the tokens for description
combined_tokens = []
for txt in tokenizer.pipe(df['ailments'], batch_size=500):
    txt_tokens = [token.text for token in txt]
    combined_tokens.append(txt_tokens)
df['combined_tokens'] = combined_tokens
print(df['combined_tokens'].head())

In [None]:
def tokenize(document):
    
    doc = nlp(document)
    
    return [token.lemma_.strip() for token in doc if (token.is_stop != True) and (token.is_punct != True)]

In [None]:
# Instantiate vectorizer object
tfidf = TfidfVectorizer(stop_words = 'english',
                       ngram_range = (1,2),
                       max_features = 2000)

# Create a vocabulary and tf-idf score per document
dtm = tfidf.fit_transform(df['ailments'])
                         

# Get feature names to use as dataframe column headers
dtm = pd.DataFrame(dtm.todense(), columns=tfidf.get_feature_names())
nn = NearestNeighbors(n_neighbors=4, algorithm='kd_tree')
nn.fit(dtm)


# View Feature Matrix as DataFrame
print(dtm.shape)
dtm.head()

In [None]:
dtm.shape

In [None]:
ideal = ["""
lack-of-appetite,bipolar-disorder,hypertension
"""]

# Query the ideal descprition
new = tfidf.transform(ideal)
new

In [None]:
nn.kneighbors(new.todense())

In [None]:
import pickle
# Dump the trained classifier (nn)  with Pickle
pickle_filename = 'ailments_model.pkl2'
pickled_model = open(pickle_filename, 'wb')  # Open the file to save as pkl file
pickle.dump(nn, pickled_model)
pickled_model.close() # Close the pickle instances

In [None]:
# Loading the saved model
ailments_model_pkl2 = open(pickle_filename, 'rb')
ailments_nn_model2 = pickle.load(ailments_model_pkl2)
print ("Loaded model :: ", ailments_nn_model2)  # print to verify

In [None]:
# Dump the trained classifier (tfidf)  with Pickle
pickle_filename_1 = 'ailments_tfidf.pkl2'
pickled_model_1 = open(pickle_filename_1, 'wb')  # Open the file to save as pkl file
pickle.dump(tfidf, pickled_model_1)
pickled_model_1.close() # Close the pickle instances

In [None]:
# Loading the saved model
ailments_model_pkl_1 = open(pickle_filename_1, 'rb')
ailments_tfidf_model2 = pickle.load(ailments_model_pkl_1)
print ("Loaded model :: ", ailments_tfidf_model2)  # print to verify

In [None]:
df['combined_tokens'].sample(10)

In [None]:
ideal2 = ['anorexia,muscular-dystropy,insomnia,add-adhd']

In [None]:
new2 = ailments_tfidf_model2.transform(ideal2)
new2

In [None]:
ailments_nn_model2.kneighbors(new2.todense())

In [None]:
df['Strain'][162]

In [None]:
df['ailments'][162]

In [None]:
import json
def recommend(user_input):
    temp_df = ailments_nn_model2.kneighbors(ailments_tfidf_model2.transform([user_input]).todense())[1]
    

    #print(temp_df)
    
    for i in range(4):
        info = df.loc[temp_df[0][i]]['Strain']
        info_effects = df.loc[temp_df[0][i]]['Effects']
        info_flavor = df.loc[temp_df[0][i]]['Flavor']
        info_description = df.loc[temp_df[0][i]]['Description']
        info_rating = df.loc[temp_df[0][i]]['Rating']
        info_ailments = df.loc[temp_df[0][i]]['ailments']

        
        print(json.dumps(info))
        print(json.dumps(info_ailments))
        print(json.dumps(info_effects))
        print(json.dumps(info_flavor))
        print(json.dumps(info_description))
        print(json.dumps(info_rating))
        

        
        #return json.dumps(info)  #for engineeers, the return does not work in jupyter lab.  Should work in vsCode.
        #return json.dumps(info_ailments)
        #return json.dumps(info_effects)
        #return json.dumps(info_flavor)
        #return json.dump(info_description)
        #return json.dumps(info_rating)
        


In [None]:
recommend('for arthritis')