# Medicine Recommendation System  

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df=pd.read_csv(r'drugsComTest_raw.csv')

In [None]:
df

### Exploratory Data Analysis

In [None]:
df.shape

In [None]:
df.isnull().sum()

In [None]:
df.info()

In [None]:
df.shape

In [None]:
df.nunique()

In [None]:
sns.histplot(data=df,x='rating',bins=10)
plt.title('Distribution of ratings')
plt.show()

In [None]:
top_drugs=df['drugName'].value_counts().head(10)
sns.barplot(x=top_drugs.values,y=top_drugs.index)
plt.xlabel('Review count')
plt.ylabel('Drug name')
plt.title('Top 10 drugs by review count')
plt.show()

In [None]:
top_drugs

In [None]:
sns.scatterplot(data=df,x='rating',y='usefulCount')
plt.title('Rating v/s Useful Count')
plt.xlabel('Rating')
plt.ylabel('Useful Count')
plt.show()

In [None]:
top_conditions=df['condition'].value_counts().head(10)
sns.barplot(x=top_conditions.values,y=top_conditions.index)
plt.title('Top 10 Frequent conditions')
plt.xlabel('Frequency')
plt.ylabel('Condition')
plt.show()

In [None]:
top_conditions

In [None]:
def get_medications_for_condition(condition):
    medications = df[df['condition'] == condition]['drugName'].unique()#.head(5)
    print(medications)

In [None]:
get_medications_for_condition('Birth Control')

### Model Training

In [None]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

df = df[['drugName', 'condition']]

df.dropna(subset=['condition'], inplace=True)


tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['condition'])


user_condition = input("Enter your health condition: ")


user_condition_tfidf = tfidf_vectorizer.transform([user_condition])


similarity_scores = cosine_similarity(user_condition_tfidf, tfidf_matrix)


top_indices = similarity_scores.argsort()[0][::-1]

top_medicines = df['drugName'].iloc[top_indices]

print("Top recommended medicines for", user_condition, ":")
for medicine in top_medicines:
    print(medicine)


In [None]:
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


df = df[['drugName', 'condition']]

df.dropna(subset=['condition'], inplace=True)

tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['condition'])

user_condition = input("Enter your health condition: ")

user_condition_tfidf = tfidf_vectorizer.transform([user_condition])

similarity_scores = cosine_similarity(user_condition_tfidf, tfidf_matrix)

top_indices = similarity_scores.argsort()[0][::-1][:10]  # Select top 10 indices

top_medicines = df['drugName'].iloc[top_indices]

print("Top 10 recommended medicines for", user_condition, ":")
for medicine in top_medicines:
    print(medicine)


In [None]:
import numpy as np
import pandas as pd
from keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences
from keras.models import Sequential
from keras.layers import Embedding, LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder

# Assuming 'df' is your DataFrame with columns 'condition' (health conditions) and 'drugName' (medicines)
df = df[['drugName', 'condition']]

# Drop rows with missing 'condition'
df.dropna(subset=['condition'], inplace=True)

# Encode the drug names as categorical labels
label_encoder = LabelEncoder()
df['drugName_encoded'] = label_encoder.fit_transform(df['drugName'])

# Tokenize the conditions (health descriptions)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(df['condition'])
X = tokenizer.texts_to_sequences(df['condition'])

# Pad the sequences to ensure equal length input to the LSTM
max_sequence_length = 100  # You can tune this based on your dataset
X = pad_sequences(X, padding='post', maxlen=max_sequence_length)

# Define the labels (encoded drug names)
y = df['drugName_encoded'].values

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the LSTM model
model = Sequential()

# Embedding layer for text input
model.add(Embedding(input_dim=len(tokenizer.word_index) + 1, 
                    output_dim=128, 
                    input_length=max_sequence_length))

# LSTM layer with dropout for regularization
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))

# Dense layer for classification
model.add(Dense(64, activation='relu'))

# Output layer: softmax for multi-class classification
model.add(Dense(len(df['drugName'].unique()), activation='softmax'))

# Compile the model
model.compile(loss='sparse_categorical_crossentropy', 
              optimizer='adam', 
              metrics=['accuracy'])

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=64, validation_data=(X_test, y_test))

# Function to predict the recommended medicine for a user input condition
def recommend_medicine(user_condition):
    user_condition_seq = tokenizer.texts_to_sequences([user_condition])
    user_condition_padded = pad_sequences(user_condition_seq, maxlen=max_sequence_length, padding='post')
    predicted = model.predict(user_condition_padded)
    predicted_label = np.argmax(predicted, axis=1)[0]
    predicted_medicine = label_encoder.inverse_transform([predicted_label])[0]
    return predicted_medicine

# Example usage: input your health condition
user_condition = input("Enter your health condition: ")
recommended_medicine = recommend_medicine(user_condition)
print(f"Recommended medicine: {recommended_medicine}")
