<a href="https://colab.research.google.com/github/PallaviYadav1208/TASK1/blob/main/medicine_recommendation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import pandas as pd
import difflib
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# loading the data from the csv file to apandas dataframe
movies_data = pd.read_csv('/content/Medicine_details.csv')

In [4]:
# printing the first 5 rows of the dataframe
movies_data.head()

Unnamed: 0,index,MedicineName,Composition,Manufacturer,MRP,Best Price
0,1,Atrest 12.5mg Tablet 10'S,TETRABENAZINE 12.5MG,ADHD,154.04,123.23
1,2,Atrest 25mg Tablet 10'S,TETRABENAZINE 25MG,ADHD,261.49,209.19
2,3,Capnea Injection 1ml,Caffeine Citrate 20 MG,ADHD,277.38,249.64
3,4,COGNIX + Tablet 10's,GINKGO BILOBA 120 mg,ADHD,117.0,93.6
4,5,NOR 4mg Injection 2ml,NOREPINEPHRINE 4MG,ADHD,56.0,50.4


In [5]:
# number of rows and columns in the data frame

movies_data.shape

(27445, 6)

In [6]:
# selecting the relevant features for recommendation

selected_features = ['MedicineName','Composition','Manufacturer']
print(selected_features)

['MedicineName', 'Composition', 'Manufacturer']


In [7]:
# replacing the null valuess with null string

for feature in selected_features:
  movies_data[feature] = movies_data[feature].fillna('')

In [8]:
# combining all the 5 selected features

combined_features = movies_data['MedicineName']+' '+movies_data['Composition']+' '+movies_data['Manufacturer']

In [9]:
print(combined_features)

0        Atrest 12.5mg Tablet 10'S   TETRABENAZINE 12.5...
1        Atrest 25mg Tablet 10'S   TETRABENAZINE 25MG  ...
2        Capnea Injection 1ml   Caffeine Citrate 20 MG ...
3        COGNIX + Tablet 10's   GINKGO BILOBA 120 mg   ...
4        NOR 4mg Injection 2ml   NOREPINEPHRINE 4MG   ADHD
                               ...                        
27440    Clearvital Cream 30gm   SKIN CARE PREPARATION ...
27441    DS GOLD ANTI AGEING Cream 50gm   SKINCARE PREP...
27442    Nt Age Cream 50gm   ALLANTOIN+ALOE VERA+MAGNES...
27443    Versa Gel 30gm   DICLOFENAC 1.16 %+LINSEED OIL...
27444    Wunder Eye Advanced Cream 15gm   Cosmetics   W...
Length: 27445, dtype: object


In [10]:
# converting the text data to feature vectors

vectorizer = TfidfVectorizer()

In [11]:
feature_vectors = vectorizer.fit_transform(combined_features)

In [12]:
print(feature_vectors)

  (0, 1211)	0.4420181779980559
  (0, 15543)	0.4505658650925616
  (0, 71)	0.0807657912130116
  (0, 15332)	0.07977003197544164
  (0, 727)	0.3307898884074934
  (0, 146)	0.5041208866784507
  (0, 2240)	0.47451463817561473
  (1, 406)	0.483950624676473
  (1, 1211)	0.4848603699134928
  (1, 15543)	0.49423653345798174
  (1, 71)	0.08859393878608532
  (1, 15332)	0.08750166653054231
  (1, 2240)	0.520506518616996
  (2, 10930)	0.11935216299228481
  (2, 318)	0.23925063658384133
  (2, 4105)	0.29427659097860737
  (2, 3298)	0.3930655552015865
  (2, 310)	0.2923698047421842
  (2, 8949)	0.18356218522436346
  (2, 3478)	0.5515813819774021
  (2, 1211)	0.5138071154489208
  (3, 147)	0.3247662279235547
  (3, 2826)	0.4110724028586446
  (3, 7889)	0.40655447366570474
  (3, 4401)	0.525416845377065
  :	:
  (27442, 16502)	0.21301994374512012
  (27442, 1519)	0.21036166758534156
  (27443, 16519)	0.4204473409923962
  (27443, 14425)	0.24991326860932694
  (27443, 9977)	0.27261529757460035
  (27443, 16807)	0.3603736597366885

In [13]:
# getting the similarity scores using cosine similarity

similarity = cosine_similarity(feature_vectors)
print(similarity)

[[1.         0.69812654 0.22711209 ... 0.         0.00555723 0.        ]
 [0.69812654 1.         0.24912471 ... 0.         0.00609586 0.        ]
 [0.22711209 0.24912471 1.         ... 0.         0.         0.        ]
 ...
 [0.         0.         0.         ... 1.         0.28013436 0.33178947]
 [0.00555723 0.00609586 0.         ... 0.28013436 1.         0.36403255]
 [0.         0.         0.         ... 0.33178947 0.36403255 1.        ]]


In [14]:
print(similarity.shape)

(27445, 27445)


In [18]:
movie_name = input(' Enter your favourite movie name : ')

list_of_all_titles = movies_data['MedicineName'].tolist()

find_close_match = difflib.get_close_matches(movie_name, list_of_all_titles)

close_match = find_close_match[0]

index_of_the_movie = movies_data[movies_data.MedicineName==close_match]['index'].values[0]

similarity_score = list(enumerate(similarity[index_of_the_movie]))

sorted_similar_movies = sorted(similarity_score, key = lambda x:x[1], reverse = True) 

print('Movies suggested for you : \n')

i = 1

for movie in sorted_similar_movies:
  index = movie[0]
  title_from_index = movies_data[movies_data.index==index]['MedicineName'].values[0]
  if (i<30):
    print(i, '.',title_from_index)
    i+=1

 Enter your favourite movie name : JETTA 12mg Tablet 2's
Movies suggested for you : 

1 . KRIMFRE Syrup 10ml
2 . KRIMFRE Tablet 10's
3 . PINBAN Suspension 10ml
4 . ODIMECTIN Suspension 10ml
5 . KRIMFRE AL 400mg Tablet 10's
6 . WORMKO PLUS Oral Suspension 10ml
7 . AL Suspension 10ml
8 . Itin A Suspension 10ml
9 . ABLAZE IM Oral Suspension 10ml
10 . BANDY Suspension 10ml
11 . ALBESTAR I Tablet 10's
12 . EXYT Suspension 10ml
13 . ALBESTAR 200 Oral Suspension 10ml
14 . TROYZOLE Suspension 10ml
15 . Evimectina Suspension 10ml
16 . Bandy Plus Suspension 10ml
17 . ABD PLUS Tablet 1's
18 . WORMKO PLUS Tablet 1's
19 . ALGED PLUS Tablet 1's
20 . IVERHOPE PLUS 6 Tablet 1's
21 . XEROWORM Oral Suspension 10ml
22 . ALTEC 200mg Suspension 10ml
23 . VORMOUT 200mg Suspension 10ml
24 . EVIMECTIN A Tablet 1's
25 . PINBAN Chewable Tablet 6's
26 . Anthel UP Suspension 10ml
27 . Ivoral Tablet 1's
28 . IVERFAST 6A Tablet 1's
29 . VERMACT 6 Tablet 4's
