# Import necessary libraries and read data

In [28]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import warnings
warnings.filterwarnings('ignore')
df=pd.read_csv('clean.csv')
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2931 entries, 0 to 2930
Data columns (total 13 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   drug_name                      2931 non-null   object 
 1   medical_condition              2931 non-null   object 
 2   side_effects                   2931 non-null   object 
 3   generic_name                   2931 non-null   object 
 4   drug_classes                   2931 non-null   object 
 5   brand_names                    2931 non-null   object 
 6   activity                       2931 non-null   object 
 7   rx_otc                         2931 non-null   object 
 8   pregnancy_category             2931 non-null   object 
 9   csa                            2931 non-null   object 
 10  alcohol                        2931 non-null   object 
 11  medical_condition_description  2931 non-null   object 
 12  rating                         2931 non-null   f

# Build a drug recommendation system

In [41]:
# 1. Preprocess data: Replace NaN in 'medical_condition_description' with an empty string and normalize to lowercase
df['medical_condition_description'] = df['medical_condition_description'].fillna('').str.lower()

# 2. Vectorize the text data using TF-IDF
vectorizer = TfidfVectorizer(stop_words='english')
tfidf_matrix = vectorizer.fit_transform(df['medical_condition_description'])

# 3. Function to summarize descriptions
def summarize_description(description, max_words=20):
    """
    Summarize the medical condition description by truncating it to a maximum number of words.
    :param description: Full description of the medical condition.
    :param max_words: Maximum number of words to retain in the summary.
    :return: Truncated description.
    """
    words = description.split()
    if len(words) > max_words:
        return ' '.join(words[:max_words]) + '...'
    return description

# 4. Function to search for drugs based on symptoms
def get_drug_recommendations(symptom, df, tfidf_matrix, top_n=5):
    """
    Search for drugs based on symptoms.
    :param symptom: User input symptom description.
    :param df: DataFrame containing drug data.
    :param tfidf_matrix: TF-IDF matrix for the 'medical_condition_description' column.
    :param top_n: Maximum number of recommendations to return.
    :return: List of recommendations.
    """
    # Normalize user input to lowercase
    symptom = symptom.lower()
    
    # Vectorize the user input symptom
    symptom_tfidf = vectorizer.transform([symptom])
    
    # Calculate cosine similarity between user input and descriptions in the dataset
    cosine_sim = cosine_similarity(symptom_tfidf, tfidf_matrix)
    
    # Get indices of the highest similarity scores
    sim_scores = list(enumerate(cosine_sim[0]))
    
    # Sort by similarity score in descending order
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    
    # Keep only the top results
    sim_scores = sim_scores[:top_n]
    
    # Return drug name, summarized description, and similarity score
    recommendations = [
        {
            'drug_name': df['drug_name'].iloc[i],
            'medical_condition_description': summarize_description(df['medical_condition_description'].iloc[i]),
            'similarity': round(sim, 2)
        }
        for i, sim in sim_scores if sim > 0
    ]
    
    return recommendations

# 5. User search interface
def search_interface(df, tfidf_matrix):
    """
    User-friendly interface for searching drugs based on symptoms.
    """
    print("=== Drug Recommendation System ===")
    print("Enter a symptom or phrase to search.")
    print("Example: 'headache', 'fever', 'skin rash'.")
    
    while True:
        # Prompt user for input
        symptom_input = input("\nEnter your symptom (or type 'exit' to quit): ")
        if symptom_input.lower() == 'exit':
            print("\nThank you for using the system!")
            break
        
        if not symptom_input.strip():
            print("\nNo input provided. Please try again!")
            continue
        
        # Ask user for the number of recommendations
        try:
            top_n = int(input("Enter the number of recommendations to display (default: 5): ") or 5)
        except ValueError:
            print("\nInvalid number. Using the default value: 5.")
            top_n = 5
        
        # Get drug recommendations
        recommendations = get_drug_recommendations(symptom_input, df, tfidf_matrix, top_n=top_n)
        
        # Display results
        if recommendations:
            print(f"\nThe following drugs may help with the symptom '{symptom_input}':")
            for idx, rec in enumerate(recommendations, start=1):
                print(f"{idx}. Drug: {rec['drug_name']}")
                print(f"   Symptom Description: {rec['medical_condition_description']}")
                print(f"   Similarity Score: {rec['similarity']}\n")
        else:
            print(f"\nNo drugs found for the symptom '{symptom_input}'. Try a different keyword!")

# 6. Launch the search interface
search_interface(df, tfidf_matrix)


=== Hệ thống gợi ý thuốc ===
Nhập triệu chứng hoặc cụm từ bạn muốn tìm kiếm.
Ví dụ: 'headache', 'fever', 'skin rash'.



Nhập triệu chứng của bạn (hoặc gõ 'exit' để thoát):  Stay out of the sun
Nhập số lượng gợi ý tối đa (mặc định: 5):  3



Những thuốc có thể giúp điều trị triệu chứng 'Stay out of the sun' là:
1. Thuốc: doxycycline
   Mô tả triệu chứng: acne other names: acne vulgaris; blackheads; breakouts; cystic acne; pimples; whiteheads; zits acne is a skin condition caused by dead...
   Độ tương đồng: 0.03

2. Thuốc: spironolactone
   Mô tả triệu chứng: acne other names: acne vulgaris; blackheads; breakouts; cystic acne; pimples; whiteheads; zits acne is a skin condition caused by dead...
   Độ tương đồng: 0.03

3. Thuốc: minocycline
   Mô tả triệu chứng: acne other names: acne vulgaris; blackheads; breakouts; cystic acne; pimples; whiteheads; zits acne is a skin condition caused by dead...
   Độ tương đồng: 0.03




Nhập triệu chứng của bạn (hoặc gõ 'exit' để thoát):  exit



Cảm ơn bạn đã sử dụng hệ thống!
