In [None]:
import numpy as np
import pandas as pd
import re
import nltk
from nltk.corpus import stopwords
from nltk.stem.porter import PorterStemmer
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import json

In [None]:
!pip install nltk
!pip install -U scikit-learn scipy matplotlib



In [None]:
df=pd.read_csv('drug_recom.csv')

In [None]:
print(df.describe())
print(df.info())
print(df.head())

       index   Drug_Name   Reason  \
count    234         234      234   
unique   234          78      230   
top     A001  Imiquimod   fissure   
freq       1          23        2   

                                              Description  \
count                                                 234   
unique                                                234   
top     Actinic cheilitis is a precancerous condition ...   
freq                                                    1   

                                Class_Type  
count                                  234  
unique                                  14  
top     Nail Fungus and other Nail Disease  
freq                                    37  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   index        234 non-null    object
 1   Drug_Name    234 non-null    object
 2   Reason    

In [None]:
print(df.isnull().sum())
df.dropna(inplace=True)
print(df.duplicated().sum())

index          0
Drug_Name      0
Reason         0
Description    0
Class_Type     0
dtype: int64
0


In [None]:
def print_description(index):
    example = df.iloc[index]
    if not example.empty:
        print(f"Description: {example['Description']}")
        print(f"Drug_Name: {example['Drug_Name']}")
        print(f"Reason: {example['Reason']}")
        print(f"Class_Type: {example['Class_Type']}")

In [None]:
nltk.download('stopwords')
ps = PorterStemmer()
stop_words = set(stopwords.words('english'))

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [None]:
def clean_text(text):
    text = text.lower()
    text = re.sub('[/(){}\[\]\|@,;]', ' ', text)
    text = re.sub('[^0-9a-z #+_]', '', text)
    return ' '.join([ps.stem(word) for word in text.split() if word not in stop_words])

In [None]:
df['Desc_Clean'] = df['Description'].apply(clean_text)

In [None]:
tfidf = TfidfVectorizer(analyzer='word', ngram_range=(1, 3), min_df=0, stop_words='english')
tfidf_matrix = tfidf.fit_transform(df['Desc_Clean'])
cos_sim = cosine_similarity(tfidf_matrix)

In [None]:
# Save TF-IDF matrix and cosine similarity matrix in JSON format

with open("tfidf_matrix.json", "w") as f_json:
    json.dump(tfidf_matrix.toarray().tolist(), f_json)

with open("cosine_similarity.json", "w") as f_json:
    json.dump(cos_sim.tolist(), f_json)

In [None]:
df.set_index(['Class_Type', 'Reason'], inplace=True)

In [None]:
def get_disease_info(class_type, reason):
    try:
        info = df.loc[(class_type, reason)]

        output = f"Class Type: {class_type}\n"
        output += f"Disease Name: {reason}\n"
        output += f"Disease Description: {info['Description'].iloc[0]}\n\n"

        # Get unique medicine names and format them into a list
        unique_medicines = set(info['Drug_Name'].tolist())
        medicine_list = "\n".join(sorted(unique_medicines))
        output += "Recommended Medicines for " + reason + ":\n" + medicine_list

    except KeyError:
        output = "The specified class type and reason combination does not exist in the dataset."

    return output

In [None]:
result = get_disease_info('Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions', 'actinic-cheilitis')
print(result)

Class Type: Actinic Keratosis Basal Cell Carcinoma and other Malignant Lesions
Disease Name: actinic-cheilitis
Disease Description: Actinic cheilitis is a precancerous condition caused by chronic sun exposure, leading to rough, scaly patches on the lips. Symptoms include dry, cracking, or peeling lips, often with persistent rough areas.

Recommended Medicines for actinic-cheilitis:
Fluorouracil 


  info = df.loc[(class_type, reason)]


In [None]:
# with open("medicine_model.bin", "wb") as f_bin:
#     f_bin.write(json.dumps(df.to_dict()).encode('utf-8'))

In [None]:
with open("drug_model.json", "w") as json_file:
    data_dict = df.to_dict(orient='records')
    json.dump(data_dict, json_file, indent=4)