In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
from sklearn.model_selection import train_test_split
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestRegressor 

In [None]:
df=pd.read_csv('/content/drive/MyDrive/medicin_csv/medicine.csv')

In [None]:
df.info()


In [None]:
columns_to_drop = ['drug_interactions']
df = df.drop(columns_to_drop, axis=1)

In [None]:
import pandas as pd
import tensorflow as tf
from scipy.sparse import csr_matrix
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.feature_extraction.text import TfidfVectorizer
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from sklearn.metrics.pairwise import cosine_similarity

In [None]:
df['combined_features'] = df['medicine_desc'] + ' ' + df['salt_composition']


tfidf_vectorizer = TfidfVectorizer()
tfidf_matrix = tfidf_vectorizer.fit_transform(df['combined_features'])


train_df, test_df = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)
train_df, val_df = train_test_split(train_df, test_size=0.1, random_state=42, shuffle=True)


train_matrix = csr_matrix.toarray(tfidf_vectorizer.transform(train_df['combined_features']))
val_matrix = csr_matrix.toarray(tfidf_vectorizer.transform(val_df['combined_features']))

input_layer = Input(shape=(tfidf_matrix.shape[1],))
embedding_layer = Dense(64, activation='relu')(input_layer)
output_layer = Dense(64)(embedding_layer)

model = Model(inputs=input_layer, outputs=output_layer)
model.compile(optimizer='adam', loss='mean_squared_error')

checkpoint = ModelCheckpoint('model_weights.h5', save_best_only=True)



In [None]:

model.fit(train_matrix, train_matrix, epochs=10, batch_size=16, validation_data=(val_matrix, val_matrix), callbacks=[checkpoint])


In [None]:
train_matrix = tfidf_vectorizer.transform(train_df['combined_features'])
model.fit(train_matrix, train_matrix, epochs=10, batch_size=16, validation_split=0.1)

In [None]:
val_matrix = tfidf_vectorizer.transform(val_df['combined_features'])
loss = model.evaluate(val_matrix, val_matrix)
print(f"Validation Loss: {loss}")

In [None]:
model.summary()

In [None]:
def find_similar_medicines(query_name, top_n=10, similarity_threshold=0.9):
    # Retrieve the features for the given medicine
    query_features = df.loc[df['product_name'] == query_name, 'combined_features'].values
    if len(query_features) == 0:
        return "Medicine not found in the dataset."

    # Vectorize the query features
    query_vector = tfidf_vectorizer.transform(query_features)

    # Get similarity scores
    similarity_scores = cosine_similarity(query_vector, tfidf_matrix).flatten()

    # Exclude the query medicine itself
    similarity_scores[df['product_name'] == query_name] = 0.0

    # Filter medicines above the threshold
    similar_indices = [i for i, score in enumerate(similarity_scores) if score >= similarity_threshold]

    # Sort by similarity and get the top N
    similar_medicines = df.iloc[similar_indices].sort_values(by='product_name', ascending=True).head(top_n)

    return similarity_scores, similar_medicines

In [None]:
query_medicine = "Lupisulin R 100IU/ml Cartridge"
similarity_scores, similar_medicines = find_similar_medicines(query_medicine)

# Display results
print(f"Similarity Scores: {similarity_scores}")
print("\nTop Similar Medicines:")
for idx, row in similar_medicines.iterrows():
    similarity_percentage = similarity_scores[df['product_name'] == row['product_name']][0] * 100
    print(f"{row['product_name']} - Similarity: {similarity_percentage:.2f}%")
    print(f"   Description: {row['medicine_desc']}")
    print(f"   Composition: {row['salt_composition']}\n")

In [None]:
query_medicine = "Human Insulatard 40IU/ml Suspension for Injection"
similar_medicines = find_similar_medicines(query_medicine)
print(similar_medicines)