In [1]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import joblib

In [2]:
# Step 1: Load the dataset
file_path = 'Medicine_Details.csv'  # Update with your file path
medicine_data = pd.read_csv(file_path)

In [3]:
# Step 2: Data Preprocessing - Combine text fields for recommendation
medicine_data['combined_features'] = medicine_data['Composition'] + " " + medicine_data['Uses'] + " " + medicine_data['Side_effects']


In [4]:
# Step 3: Vectorization - Initialize the TF-IDF Vectorizer
tfidf_vectorizer = TfidfVectorizer(stop_words='english')

In [6]:
 #Fit and transform the combined features into a TF-IDF matrix
tfidf_matrix = tfidf_vectorizer.fit_transform(medicine_data['combined_features'])

In [7]:
# Step 4: Similarity Calculation - Compute the cosine similarity matrix
cosine_sim = cosine_similarity(tfidf_matrix, tfidf_matrix)

In [8]:
# Step 5: Save the model components
# Save the TF-IDF Vectorizer
tfidf_vectorizer_filename = 'tfidf_vectorizer.pkl'
joblib.dump(tfidf_vectorizer, tfidf_vectorizer_filename)

['tfidf_vectorizer.pkl']

In [9]:
# Save the cosine similarity matrix
cosine_similarity_filename = 'cosine_similarity.pkl'
joblib.dump(cosine_sim, cosine_similarity_filename)

['cosine_similarity.pkl']

In [10]:
# Save the medicine data (for future use in recommendations)
medicine_data_filename = 'medicine_data.pkl'
joblib.dump(medicine_data, medicine_data_filename)

['medicine_data.pkl']

In [11]:
# Function to get recommendations
def get_recommendations(medicine_name, cosine_sim=cosine_sim):
    # Get the index of the medicine that matches the name
    idx = medicine_data[medicine_data['Medicine Name'] == medicine_name].index[0]

    # Get the pairwise similarity scores of all medicines with that medicine
    sim_scores = list(enumerate(cosine_sim[idx]))

    # Sort the medicines based on the similarity scores
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    # Get the indices of the 5 most similar medicines (excluding itself)
    sim_scores = sim_scores[1:6]

    # Get the medicine names and return them
    medicine_indices = [i[0] for i in sim_scores]
    return medicine_data['Medicine Name'].iloc[medicine_indices]

In [12]:
# Example: Get recommendations for a specific medicine
recommended_medicines = get_recommendations('Avastin 400mg Injection')

In [13]:
# Print the recommended medicines
print("Recommended medicines similar to 'Avastin 400mg Injection':")
print(recommended_medicines)

Recommended medicines similar to 'Avastin 400mg Injection':
3289     Gemtaz 1gm Injection
1740       Erlocip 100 Tablet
1739       Erlocip 150 Tablet
11749     Zyceva 150mg Tablet
11303     Xeloda 500mg Tablet
Name: Medicine Name, dtype: object


In [14]:
# -------------------
# Step 6: Loading the saved model components (for future use)
# Load the TF-IDF Vectorizer
loaded_tfidf_vectorizer = joblib.load('tfidf_vectorizer.pkl')

In [15]:
# Load the cosine similarity matrix
loaded_cosine_sim = joblib.load('cosine_similarity.pkl')

In [16]:
# Load the medicine data
loaded_medicine_data = joblib.load('medicine_data.pkl')

In [17]:
# Use the loaded components to make recommendations
def get_loaded_recommendations(medicine_name, cosine_sim=loaded_cosine_sim):
    idx = loaded_medicine_data[loaded_medicine_data['Medicine Name'] == medicine_name].index[0]
    sim_scores = list(enumerate(cosine_sim[idx]))
    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)
    sim_scores = sim_scores[1:6]
    medicine_indices = [i[0] for i in sim_scores]
    return loaded_medicine_data['Medicine Name'].iloc[medicine_indices]

In [19]:
 #Example with loaded model components
loaded_recommended_medicines = get_loaded_recommendations('Avastin 400mg Injection')

In [20]:
print("Recommended medicines (using loaded components) similar to 'Avastin 400mg Injection':")
print(loaded_recommended_medicines)

Recommended medicines (using loaded components) similar to 'Avastin 400mg Injection':
3289     Gemtaz 1gm Injection
1740       Erlocip 100 Tablet
1739       Erlocip 150 Tablet
11749     Zyceva 150mg Tablet
11303     Xeloda 500mg Tablet
Name: Medicine Name, dtype: object
