In [None]:
# importing libraries
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
from collections import Counter
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import StratifiedKFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.tree import DecisionTreeClassifier
from sklearn. metrics import confusion_matrix, precision_score, recall_score, f1_score, accuracy_score, classification_report, hamming_loss
import pickle

In [None]:
# loading dataset into python environment
data = pd.read_excel('webmdexcel.xlsx', dtype = {'Age': str, 'Condition': str, 'Date': str, 'Drug': str, 'DrugId': int, 'EaseofUse': 'int8', 'Effectiveness': 'int8', 'Reviews': str, 'Satisfaction': 'int8', 'Sex': str, 'Sides': str, 'UsefulCount': 'int16'})

In [None]:
# display information about the dataset
data.info()

In [None]:
# count of rows and columns
data.shape

In [None]:
# display first 40 rows
data.head(40)

## MISSING VALUES

In [None]:
# count of null values
data.isnull().sum()

In [None]:
# percentage of null values
(data.isnull().sum()*100)/len(data)

~ Columns 'Age', 'Condition', 'Reviews', 'Sex', 'Sides' are having null values.

**About this file**
*******
Drug (categorical): name of drug

DrugId (numerical): drug id

Condition (categorical): name of condition

Review (text): patient review

Side (text): side effects associated with drug (if any)

EaseOfUse (numerical): 5 star rating 

Effectiveness (numerical): 5 star rating 

Satisfaction (numerical): 5 star rating 

Date (date): date of review entry

UsefulCount (numerical): number of users who found review useful.

Age (numerical): age group range of user

Sex (categorical): gender of user

In [None]:
# unique values in 'Date' column
data['Date'].unique()

In [None]:
# checking data types of each column
data.dtypes

In [None]:
# convert date to a similar format
# need to change which is of object data type to datetime format
data['Date'] = pd.to_datetime(data['Date'])

In [None]:
# checking data types of each column
data.dtypes

In [None]:
# create a new column year, for that applying lambda function on 'Date' column
data['Year'] = data['Date'].apply(lambda x: x.year)

In [None]:
# display first 40 rows
data.head(40)

In [None]:
# storing all the column names as list
col = [feature for feature in data.columns]

# displaying unique values and its count in all columns
for i in col:
    print(f"{i} column \n")    
    nuni = data[i].nunique()
    print(f"Count of unique values = {nuni}\n")    
    uni = data[i].unique()
    print(f"Unique values = {uni}\n\n\n")   

In [None]:
# display rows having 'EaseofUse' as 6 or 10
data[(data['EaseofUse']==6)|(data['EaseofUse']==10)]

~ Since 'EaseofUse' column is a 5 star rating, replacing 6 with 3 (which is the equivalent number out of 5) and 10 with 5

In [None]:
# correcting errors in 'EaseofUse' column

data['EaseofUse'] = data['EaseofUse'].replace(10, 5)
data['EaseofUse'] = data['EaseofUse'].replace(6, 3)

In [None]:
# display rows having 'EaseofUse' as 6 or 10 after rectifying errors
data[(data['EaseofUse']==6)|(data['EaseofUse']==10)]

In [None]:
# display rows having 'Effectiveness' as 6 or 10
data[(data['Effectiveness']==6)|(data['Effectiveness']==10)]

~ Since 'Effectiveness' column is a 5 star rating, replacing 6 with 3 (which is the equivalent number out of 5) and 10 with 5

In [None]:
# correcting errors in 'Effectiveness' column

data['Effectiveness'] = data['Effectiveness'].replace(10, 5)
data['Effectiveness'] = data['Effectiveness'].replace(6, 3)

In [None]:
# display rows having 'Effectiveness' as 6 or 10 after rectifying errors
data[(data['Effectiveness']==6)|(data['Effectiveness']==10)]

In [None]:
# display rows having 'Satisfaction' as 6 or 10
data[(data['Satisfaction']==6)|(data['Satisfaction']==10)]

~ Since 'Satisfaction' column is a 5 star rating, replacing 6 with 3 (which is the equivalent number out of 5) and 10 with 5

In [None]:
# correcting errors in 'Satisfaction' column

data['Satisfaction'] = data['Satisfaction'].replace(10, 5)
data['Satisfaction'] = data['Satisfaction'].replace(6, 3)

In [None]:
# display rows having 'Satisfaction' as 6 or 10 after rectifying errors
data[(data['Satisfaction']==6)|(data['Satisfaction']==10)]

In [None]:
# convert the data type of 'Age' column to a string
data['Age'] = data['Age'].astype(str)

In [None]:
# display all the data types of the dataframe
data.dtypes

In [None]:
# unique values in 'Age' column
data['Age'].unique()

~ from the above unique values under 'Age' column, it is mentioned '2023-12-07 00:00:00' and '2023-06-03 00:00:00' which is a typographical error and it should be '7-12' and '3-6' which represents the age group range of user.

In [None]:
# displaying rows having errors in 'Age' column
data[(data['Age']=='2023-12-07 00:00:00')|(data['Age']=='2023-06-03 00:00:00')]

In [None]:
# count of rows having errors in 'Age' column
len(data[(data['Age']=='2023-12-07 00:00:00')|(data['Age']=='2023-06-03 00:00:00')])

~ 2482 rows having errors in 'Age' column

In [None]:
# replacing '2023-12-07 00:00:00' with '7-12', '2023-06-03 00:00:00' with '3-6' under 'Age' column
for age in data['Age'].unique():
    if age=='2023-12-07 00:00:00': 
        data['Age'].replace({'2023-12-07 00:00:00':'7-12'},inplace=True)
    elif age=='2023-06-03 00:00:00':
        data['Age'].replace({'2023-06-03 00:00:00':'3-6'},inplace=True)

In [None]:
# unique values in 'Age' column
data['Age'].unique()

## CHECKING FOR DUPLICATES

In [None]:
# display the rows having duplicates 
data[data.duplicated()]

~ 943 rows were duplicated rows

## EDA

**1) DRUG**

In [None]:
# display top 20 drugs
data['Drug'].value_counts().head(20).plot(kind='bar')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Top 20 Drugs', fontsize=20, color='red');

In [None]:
# display details of 'cymbalta' drug with 'Condition'
list(data.loc[data['Drug'] == 'cymbalta'][['Drug','Condition']].groupby(['Drug']))

~ 'cymbalta' was the most widely used drug, used for curing Anxiousness associated with Depression, Chronic Muscle or Bone Pain, Neuropathic Pain etc.

**2) CONDITION**

In [None]:
# display condition for which drugs are used
data[['Drug', 'Condition']].head(20)

In [None]:
# Top 20 conditions
data['Condition'].value_counts().head(20).plot(kind='bar')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Top 20 conditions', fontsize=20, color='red');
plt.show()

~ Majority of the medicines were used for curing other conditions.

In [None]:
# display rows having missing 'Condition' 
data[data['Condition'].isna()]

~ When the condition for which the medicine is taken was missing, very less number of users were finding the review useful.

**3) EFFECTIVENESS**

In [None]:
# display 'effectiveness' rating of drugs
data['Effectiveness'].value_counts().head(20).plot(kind='barh')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Effectiveness rating', fontsize=20, color='red');
plt.ylabel('Effectiveness rating');
plt.show()

**4) EASE OF USE**

In [None]:
# display 'ease of use' rating of drugs
data['EaseofUse'].value_counts().head(20).plot(kind='barh')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Ease of Use rating', fontsize=20, color='red');
plt.ylabel('Ease of Use rating');
plt.show()

**5) SATISFACTION**

In [None]:
# display 'satisfaction' rating of drugs
data['Satisfaction'].value_counts().plot(kind='pie', autopct = '%1.2f%%');
plt.title(label='Satisfaction rating', fontsize=20, color='red');
plt.show()

~ Majority of the users had given 5 as rating for Satisfaction, Ease of use and Effectiveness of the drug.

**6) SIDES**

In [None]:
# display top 20 side effects of drug
data['Sides'].value_counts().head(20).plot(kind='bar')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Top 20 side effects of drug', fontsize=20, color='red');
plt.show()

~ Most commonly found side effects associated with the drug were Nausea, vomiting, constipation, lightheadedness, dizziness, or drowsiness.

In [None]:
# display side effects of medicine in which it is mentioned only once
data[data['Sides'].isin(data['Sides'].value_counts()[data['Sides'].value_counts()<2].index)]

In [None]:
# display last  110 side effects of medicine in which it is mentioned only once
data['Sides'].value_counts().tail(110).to_string()

In [None]:
# display row having missing 'Sides'
data[data['Sides'].isna()]

In [None]:
# display row having missing 'Sides' and  'Condition' as 'Other' 
data[(data['Sides'].isna()) & (data['Condition']=='Other')]

~ 17460 rows were having missing side effects, out of which 6468 rows were having 'Condition' as 'Other'

**7) AGE**

In [None]:
# display distribution of Age
data['Age'].value_counts().plot(kind='bar')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Age distribution', fontsize=20, color='red');
plt.show()

~ Maximum and minimum amount of reviews were obtained from the age group between 45-54 and 0-2.

In [None]:
# display rows having missing 'Age'
data[data['Age']=='nan']

In [None]:
# display rows having missing 'Age' and 'Sex'
data[(data['Age']=='nan') & (data['Sex'].isna())] 

~ 12202 rows were having 'Age' column missing, out of which 7510 rows were having 'Sex' as missing. 

**8) GENDER**

In [None]:
# display distribution of Gender
data['Sex'].value_counts().head(20).plot(kind='pie', autopct = '%1.2f%%')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Gender distribution', fontsize=20, color='red');
plt.show()

~ Majority of the user reviews were given by females when compared to males.

In [None]:
# display rows having missing 'Gender'
data[data['Sex'].isna()]

**9) USEFUL COUNT**

In [None]:
# sort the dataframe with 'UsefulCount' column and display necessary columns
highest_usefulcount = data.sort_values(by = 'UsefulCount', axis=0, ascending=False)
highest_usefulcount[['UsefulCount', 'Drug', 'Condition', 'Reviews', 'Sides', 'EaseofUse', 'Effectiveness', 'Satisfaction']].head(20)

~ Rows are displayed with highest number of users who found the review useful for a particular drug. 'reclast bottle, infusion' drug had got the highest 'UsefulCount' indicating that review regarding this particular drug was useful to many users. 

**10) YEAR**

In [None]:
# display distribution of Year
data['Year'].value_counts().plot(kind='bar')
plt.rcParams['figure.figsize'] = (10, 7)
plt.title(label='Year wise user reviews', fontsize=20, color='red');
plt.show()

~ User reviews on specific drugs were taken for 13 years from the year 2007 to 2020 with maximum reviews for the year 2009 and minimum reviews for the year 2020.

**11) REVIEWS**

In [None]:
# display top 20 reviews
data['Reviews'].value_counts().head(20)

### DATA UNDERSTANDING

* Dataset was having 362806 rows and 12 columns.

* User reviews on specific drugs were taken for 13 years from the year 2007 to 2020 with maximum reviews for the year 2009 and minimum reviews for the year 2020.

* Maximum and minimum amount of reviews were obtained from the age group between 45-54 and 0-2.

* Majority of the medicines were used for curing other conditions. When the condition for which the medicine is taken was missing, very less number of users was finding the review useful.

* 'cymbalta' was the most widely used drug, used for curing Anxiousness associated with Depression, Chronic Muscle or Bone Pain, Neuropathic Pain etc.

* Majority of the users had given 5 as rating for Satisfaction, Ease of use and Effectiveness of the drug.

* Majority of the user reviews were given by females when compared to males.

* Most commonly found side effects associated with the drug were Nausea, vomiting, constipation, lightheadedness, dizziness, or drowsiness. 17460 rows were having missing side effects, out of which 6468 rows were having 'Condition' as 'Other'.

* 'reclast bottle, infusion' drug had got the highest 'UsefulCount' indicating that review regarding this particular drug was useful to many users. 

## FILLLING MISSING VALUES

In [None]:
# display information about the dataset
data.info()

In [None]:
# count of null values
data.isnull().sum()

~ Missing values are present in 'Age', 'Condition', 'Reviews', 'Sex', 'Sides' columns which are of object type. Hence filling missing values with its mode.

### a) AGE COLUMN

In [None]:
# unique values in 'Age' column
data['Age'].unique()

~ missing values in 'Age' column are marked as 'nan'

In [None]:
# mode of 'Age' column
data['Age'].mode()[0]

In [None]:
# replacing missing value in 'Age' column with its mode
data.loc[data.Age == 'nan', 'Age'] = '45-54'

In [None]:
# unique values in 'Age' column
data['Age'].unique()

### b) CONDITION COLUMN

In [None]:
# mode of 'Condition' column
data['Condition'].mode()[0]

In [None]:
# display rows having missing Condition
data[data['Condition'].isna()]

In [None]:
# filling missing values in 'Condition' column with its mode
data.loc[data['Condition'].isna(), 'Condition'] = 'Other'

In [None]:
# display rows having missing Condition
data[data['Condition'].isna()]

### c) REVIEWS COLUMN

In [None]:
# mode of 'Reviews' column
data['Reviews'].mode()[0]

In [None]:
# display rows having missing Reviews
data[data['Reviews'].isna()]

In [None]:
# filling missing values in 'Reviews' column with its mode
data.loc[data['Reviews'].isna(), 'Reviews'] = 'good'

In [None]:
# display rows having missing Reviews
data[data['Reviews'].isna()]

### d) SEX COLUMN

In [None]:
# mode of 'Sex' column
data['Sex'].mode()[0]

In [None]:
# display rows having missing Sex
data[data['Sex'].isna()]

In [None]:
# filling missing values in 'Sex' column with its mode
data.loc[data['Sex'].isna(), 'Sex'] = 'Female'

In [None]:
# display rows having missing Sex
data[data['Sex'].isna()]

### e) SIDES COLUMN

In [None]:
# mode of 'Sides' column
data['Sides'].mode()[0]

In [None]:
# display rows having missing Sides
data[data['Sides'].isna()]

In [None]:
# filling missing values in 'Sides' column with its mode
data.loc[data['Sides'].isna(), 'Sides'] = 'Nausea ,  vomiting ,  constipation ,  lightheadedness ,  dizziness , or drowsiness may occur.'

In [None]:
# display rows having missing Sides
data[data['Sides'].isna()]

In [None]:
# side effects of medicine in which it is mentioned only once, store it in a dataframe
data3 = data[data['Sides'].isin(data['Sides'].value_counts()[data['Sides'].value_counts()<2].index)]

In [None]:
# count of rows having side effects of medicine mentioned only once
len(data3)

In [None]:
# converting a specific column to a list
data3['Sides'].to_list()

In [None]:
# converting a specific column to a list and storing it in a variable
sides_1list = data3['Sides'].to_list()

In [None]:
# display 
sides_1list

In [None]:
# From the side effects of medicine which is mentioned only once, display rows where 'Nausea' is mentioned and store it in a list 
a = []
for i in sides_1list:
    if i.__contains__("Nausea")|i.__contains__("nausea"):
        print(f"{i}")
        a.append(i)

In [None]:
# From the side effects of medicine which is mentioned only once, filtering out side effects other than 'Nausea'
other_sides = [feature for feature in sides_1list if feature not in a]
other_sides

In [None]:
# side effects of medicine which  is mentioned only once, grouping into various classifications
for side in data['Sides'].unique():
    if side=='Nausea ,  vomiting , loss of appetite, or mild muscle/ joint pain  may occur.': 
        data['Sides'].replace({'Nausea ,  vomiting , loss of appetite, or mild muscle/ joint pain  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  constipation ,  lightheadedness ,  dizziness , drowsiness, fever, or  headache  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  constipation ,  lightheadedness ,  dizziness , drowsiness, fever, or  headache  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea , nervousness, or  trouble sleeping  may occur.':
        data['Sides'].replace({'Nausea , nervousness, or  trouble sleeping  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Infusion reactions (such as flushing,  itching ),  nausea , or  headache  may occur.':
        data['Sides'].replace({'Infusion reactions (such as flushing,  itching ),  nausea , or  headache  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea , gas, or  diarrhea  may occur.':
        data['Sides'].replace({'Nausea , gas, or  diarrhea  may occur.':'Nausea etc.'},inplace=True) 
    elif side=='Nausea ,  vomiting ,  weakness , loss of appetite,  diarrhea , or pain/swelling/redness at injection site may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  weakness , loss of appetite,  diarrhea , or pain/swelling/redness at injection site may occur.':'Nausea etc.'},inplace=True) 
    elif side=='Tiredness, abnormal taste,  hair loss ,  headache ,  weight loss ,  nausea / vomiting ,  diarrhea ,  stomach / abdominal pain , mild  itching , or decreased appetite may occur.':
        data['Sides'].replace({'Tiredness, abnormal taste,  hair loss ,  headache ,  weight loss ,  nausea / vomiting ,  diarrhea ,  stomach / abdominal pain , mild  itching , or decreased appetite may occur.':'Nausea etc.'},inplace=True)
    elif side=='Headache , flushing,  dizziness , drowsiness,  nausea ,  vomiting , loss of appetite,  diarrhea ,  constipation , sensitive  skin , or  flu -like symptoms (such as  sore throat ,  cough , body aches) may occur.':
        data['Sides'].replace({'Headache , flushing,  dizziness , drowsiness,  nausea ,  vomiting , loss of appetite,  diarrhea ,  constipation , sensitive  skin , or  flu -like symptoms (such as  sore throat ,  cough , body aches) may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  diarrhea , lip/ mouth  sores, or loss of appetite may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  diarrhea , lip/ mouth  sores, or loss of appetite may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  diarrhea , fever, chills,  abdominal pain , flushing, and  dizziness  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  diarrhea , fever, chills,  abdominal pain , flushing, and  dizziness  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting , loss of appetite,  constipation ,  dry mouth , difficulty swallowing, drowsiness,  dizziness ,  headache ,  trouble sleeping , muscle/ joint pain , or darkening of the  skin  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting , loss of appetite,  constipation ,  dry mouth , difficulty swallowing, drowsiness,  dizziness ,  headache ,  trouble sleeping , muscle/ joint pain , or darkening of the  skin  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  dizziness ,  stomach  upset, and abdominal  cramps  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  dizziness ,  stomach  upset, and abdominal  cramps  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Stomach  upset,  nausea ,  vomiting , loss of appetite, or  headache  may occur.':
        data['Sides'].replace({'Stomach  upset,  nausea ,  vomiting , loss of appetite, or  headache  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting , loss of appetite, unusual taste/dryness in the  mouth ,  dizziness ,  diarrhea , or redness/pain/leakage at the injection site may occur.':
        data['Sides'].replace({'Nausea ,  vomiting , loss of appetite, unusual taste/dryness in the  mouth ,  dizziness ,  diarrhea , or redness/pain/leakage at the injection site may occur.':'Nausea etc.'},inplace=True)
    elif side=='Headache ,  nausea ,  vomiting ,  diarrhea ,  dizziness , drowsiness, numbness/tingling, or  trouble sleeping  may occur.':
        data['Sides'].replace({'Headache ,  nausea ,  vomiting ,  diarrhea ,  dizziness , drowsiness, numbness/tingling, or  trouble sleeping  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea / vomiting ,  diarrhea ,  stomach / abdominal pain ,  mouth  sores,  constipation ,  hemorrhoids ,  headache ,  dizziness , tiredness, muscle/ joint pain , or  trouble sleeping  may occur.':
        data['Sides'].replace({'Nausea / vomiting ,  diarrhea ,  stomach / abdominal pain ,  mouth  sores,  constipation ,  hemorrhoids ,  headache ,  dizziness , tiredness, muscle/ joint pain , or  trouble sleeping  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting , loss of appetite, abdominal  cramps ,  diarrhea , and  heartburn  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting , loss of appetite, abdominal  cramps ,  diarrhea , and  heartburn  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  diarrhea , and  upset stomach  may occur as your body adjusts to the  metformin .':
        data['Sides'].replace({'Nausea ,  diarrhea , and  upset stomach  may occur as your body adjusts to the  metformin .':'Nausea etc.'},inplace=True)
    elif side=='Headache ,  hair loss ,  dizziness ,  diarrhea ,  constipation ,  nausea ,  vomiting ,  muscle pain , swelling of arms/legs, and redness/pain/swelling/ itching /bleeding at injection site may occur.':
        data['Sides'].replace({'Headache ,  hair loss ,  dizziness ,  diarrhea ,  constipation ,  nausea ,  vomiting ,  muscle pain , swelling of arms/legs, and redness/pain/swelling/ itching /bleeding at injection site may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  lightheadedness ,  dizziness , or a feeling of spinning may occur.':
        data['Sides'].replace({'Nausea ,  lightheadedness ,  dizziness , or a feeling of spinning may occur.':'Nausea etc.'},inplace=True)
    elif side=='Dizziness ,  lightheadedness ,  diarrhea ,  nausea ,  vomiting ,  bloating /gas,  headache ,  weakness , or increased urination may occur.':
        data['Sides'].replace({'Dizziness ,  lightheadedness ,  diarrhea ,  nausea ,  vomiting ,  bloating /gas,  headache ,  weakness , or increased urination may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  diarrhea , abdominal/ stomach pain ,  cough ,  dizziness ,  headache , flushing, or loss of appetite may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  diarrhea , abdominal/ stomach pain ,  cough ,  dizziness ,  headache , flushing, or loss of appetite may occur.':'Nausea etc.'},inplace=True)
    elif side=='Headache ,  nausea , chills, or fever may occur.':
        data['Sides'].replace({'Headache ,  nausea , chills, or fever may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  stomach / abdominal pain , gas,  diarrhea ,  weight loss ,  headache ,  muscle cramps ,  dizziness , or  weakness  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  stomach / abdominal pain , gas,  diarrhea ,  weight loss ,  headache ,  muscle cramps ,  dizziness , or  weakness  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Drowsiness,  dizziness ,  headache ,  nausea ,  vomiting , loss of appetite,  constipation ,  stomach  upset,  blurred vision , decreased coordination, and  dry mouth /nose/throat may occur.':
        data['Sides'].replace({'Drowsiness,  dizziness ,  headache ,  nausea ,  vomiting , loss of appetite,  constipation ,  stomach  upset,  blurred vision , decreased coordination, and  dry mouth /nose/throat may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  diarrhea , tiredness, loss of appetite,  weakness , and fever may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  diarrhea , tiredness, loss of appetite,  weakness , and fever may occur.':'Nausea etc.'},inplace=True)
    elif side=='Stomach  upset/fullness,  nausea ,  headache , and  weight  gain may occur.':
        data['Sides'].replace({'Stomach  upset/fullness,  nausea ,  headache , and  weight  gain may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting , loss of appetite,  mouth  sores, changes in taste,  constipation , tiredness,  dizziness ,  trouble sleeping ,  headache , or pain/redness/swelling at the injection site may occur.':
        data['Sides'].replace({'Nausea ,  vomiting , loss of appetite,  mouth  sores, changes in taste,  constipation , tiredness,  dizziness ,  trouble sleeping ,  headache , or pain/redness/swelling at the injection site may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  upset stomach , loss of appetite,  weight loss ,  constipation , tiredness/ weakness ,  headache , or change in how food tastes may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  upset stomach , loss of appetite,  weight loss ,  constipation , tiredness/ weakness ,  headache , or change in how food tastes may occur.':'Nausea etc.'},inplace=True)
    elif side=='Diarrhea ,  nausea , drowsiness,  dizziness ,  headache  or  vomiting  may occur.':
        data['Sides'].replace({'Diarrhea ,  nausea , drowsiness,  dizziness ,  headache  or  vomiting  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Hoarseness, changes in your voice,  nausea ,  diarrhea ,  headache , tiredness, decreased  weight , or muscle/ joint pain  may occur.':
        data['Sides'].replace({'Hoarseness, changes in your voice,  nausea ,  diarrhea ,  headache , tiredness, decreased  weight , or muscle/ joint pain  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  diarrhea ,  constipation ,  stomach  upset,  dizziness , or pain/ itching /swelling at injection site may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  diarrhea ,  constipation ,  stomach  upset,  dizziness , or pain/ itching /swelling at injection site may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  diarrhea ,  dizziness ,  headache , or unusual tiredness may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  diarrhea ,  dizziness ,  headache , or unusual tiredness may occur.':'Nausea etc.'},inplace=True)
    elif side=='Hot flashes , tiredness,  joint pain ,  nausea , decreased appetite,  weight loss , or decreased sexual interest/ability may occur.':
        data['Sides'].replace({'Hot flashes , tiredness,  joint pain ,  nausea , decreased appetite,  weight loss , or decreased sexual interest/ability may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  diarrhea , abdominal/ stomach pain , increased  saliva , strange  metallic taste  in the  mouth , loss of appetite, or sores in the  mouth  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  diarrhea , abdominal/ stomach pain , increased  saliva , strange  metallic taste  in the  mouth , loss of appetite, or sores in the  mouth  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting , loss of appetite, flushing,  dry skin ,  headache , leg  cramps , decreased sexual interest/ability, and male  breast  tenderness/enlargement may occur.':
        data['Sides'].replace({'Nausea ,  vomiting , loss of appetite, flushing,  dry skin ,  headache , leg  cramps , decreased sexual interest/ability, and male  breast  tenderness/enlargement may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting , tiredness, loss of appetite,  diarrhea ,  weight loss , swelling hands/ ankles /feet, or  weakness  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting , tiredness, loss of appetite,  diarrhea ,  weight loss , swelling hands/ ankles /feet, or  weakness  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Drowsiness,  dizziness ,  lightheadedness ,  nausea ,  vomiting , or  constipation  may occur.':
        data['Sides'].replace({'Drowsiness,  dizziness ,  lightheadedness ,  nausea ,  vomiting , or  constipation  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Pain/soreness/redness/swelling at the injection site, fever,  headache , tiredness,  sore throat ,  nausea ,  diarrhea , loss of appetite, and  dizziness  may occur.':
        data['Sides'].replace({'Pain/soreness/redness/swelling at the injection site, fever,  headache , tiredness,  sore throat ,  nausea ,  diarrhea , loss of appetite, and  dizziness  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Pain/redness/swelling at the injection site, fever, tiredness,  headache ,  nausea , and  diarrhea  may occur.':
        data['Sides'].replace({'Pain/redness/swelling at the injection site, fever, tiredness,  headache ,  nausea , and  diarrhea  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting , tiredness, and loss of appetite may occur.':
        data['Sides'].replace({'Nausea ,  vomiting , tiredness, and loss of appetite may occur.':'Nausea etc.'},inplace=True)
    elif side=='Numbness/tingling, change in the sense of taste,  nausea ,  diarrhea ,  weight loss ,  muscle spasms / twitching , tiredness,  dizziness , or drowsiness may occur.':
        data['Sides'].replace({'Numbness/tingling, change in the sense of taste,  nausea ,  diarrhea ,  weight loss ,  muscle spasms / twitching , tiredness,  dizziness , or drowsiness may occur.':'Nausea etc.'},inplace=True)
    elif side=='Dizziness ,  headache , and  nausea  may occur.':
        data['Sides'].replace({'Dizziness ,  headache , and  nausea  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Stomach  upset,  nausea ,  diarrhea , or drowsiness may occur.':
        data['Sides'].replace({'Stomach  upset,  nausea ,  diarrhea , or drowsiness may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  headache ,  bloating ,  breast  tenderness,  acne , or  weight  gain may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  headache ,  bloating ,  breast  tenderness,  acne , or  weight  gain may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  heartburn ,  constipation , shaking (tremor), swelling  ankles /feet/hands, or  dizziness  may occur.':
        data['Sides'].replace({'Nausea ,  heartburn ,  constipation , shaking (tremor), swelling  ankles /feet/hands, or  dizziness  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Pain at injection site, chills, tingling, flushing,  headache ,  nausea , or  vomiting  may occur.':
        data['Sides'].replace({'Pain at injection site, chills, tingling, flushing,  headache ,  nausea , or  vomiting  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  stomach pain ,  diarrhea ,  constipation , or pain/redness at the injection site may occur.':
        data['Sides'].replace({'Nausea ,  stomach pain ,  diarrhea ,  constipation , or pain/redness at the injection site may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  headache , runny/stuffy nose, nose/throat  itching , or watery  eyes  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  headache , runny/stuffy nose, nose/throat  itching , or watery  eyes  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  vomiting ,  stomach / abdominal pain , or  joint pain  may occur.':
        data['Sides'].replace({'Nausea ,  vomiting ,  stomach / abdominal pain , or  joint pain  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Diarrhea ,  constipation ,  abdominal pain / cramps ,  nausea ,  vomiting , gas, or  bloating  may occur.':
        data['Sides'].replace({'Diarrhea ,  constipation ,  abdominal pain / cramps ,  nausea ,  vomiting , gas, or  bloating  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Tiredness,  weakness ,  nausea , or arm/leg/hand/ foot pain  may occur.':
        data['Sides'].replace({'Tiredness,  weakness ,  nausea , or arm/leg/hand/ foot pain  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  headache , and  diarrhea  may occur.':
        data['Sides'].replace({'Nausea ,  headache , and  diarrhea  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Coughing , stuffy nose,  sneezing ,  headache ,  nausea , or tearing may occur.':
        data['Sides'].replace({'Coughing , stuffy nose,  sneezing ,  headache ,  nausea , or tearing may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  upset stomach ,  abdominal pain , gas/ bloating ,  diarrhea ,  headache ,  back pain , arm/ leg pain ,  dizziness , or  weakness  may occur.':
        data['Sides'].replace({'Nausea ,  upset stomach ,  abdominal pain , gas/ bloating ,  diarrhea ,  headache ,  back pain , arm/ leg pain ,  dizziness , or  weakness  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Constipation ,  nausea ,  headache ,  diarrhea ,  vomiting ,  stomach  upset, gas,  dizziness , drowsiness,  trouble sleeping , tremor, or redness/swelling at the injection site may occur.':
        data['Sides'].replace({'Constipation ,  nausea ,  headache ,  diarrhea ,  vomiting ,  stomach  upset, gas,  dizziness , drowsiness,  trouble sleeping , tremor, or redness/swelling at the injection site may occur.':'Nausea etc.'},inplace=True)
    elif side=='Diarrhea ,  constipation ,  nausea ,  vomiting , abdominal  cramps /pain, or  heartburn  may occur.':
        data['Sides'].replace({'Diarrhea ,  constipation ,  nausea ,  vomiting , abdominal  cramps /pain, or  heartburn  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Nausea ,  headache ,  constipation , or pain/discomfort at the injection site may occur.':
        data['Sides'].replace({'Nausea ,  headache ,  constipation , or pain/discomfort at the injection site may occur.':'Nausea etc.'},inplace=True)
    elif side=='(See also How to Use section). Nausea ,  abdominal pain ,  diarrhea ,  vomiting , or flushing may occur.':
        data['Sides'].replace({'(See also How to Use section). Nausea ,  abdominal pain ,  diarrhea ,  vomiting , or flushing may occur.':'Nausea etc.'},inplace=True)
    elif side=='Diarrhea ,  nausea , or  heartburn  may occur.':
        data['Sides'].replace({'Diarrhea ,  nausea , or  heartburn  may occur.':'Nausea etc.'},inplace=True)
    elif side=='Temporary burning/stinging of the  eye , itchy/red  eyes ,  headache , or  dizziness  may occur.':
        data['Sides'].replace({'Temporary burning/stinging of the  eye , itchy/red  eyes ,  headache , or  dizziness  may occur.':'Stinging of the eyes etc.'},inplace=True)
    elif side=='Temporary stinging/discomfort of the  eye , watery/dry/itchy/red  eyes ,  blurred vision , feeling as if something is in the  eye ,  headache ,  trouble sleeping , or  dizziness  may occur.':
        data['Sides'].replace({'Temporary stinging/discomfort of the  eye , watery/dry/itchy/red  eyes ,  blurred vision , feeling as if something is in the  eye ,  headache ,  trouble sleeping , or  dizziness  may occur.':'Stinging of the eyes etc.'},inplace=True)
    elif side=='Stinging/burning of the  eyes  for 1 to 2 minutes and temporary  blurred vision  may occur when you apply this  medication . Watery  eyes  may also occur.':
        data['Sides'].replace({'Stinging/burning of the  eyes  for 1 to 2 minutes and temporary  blurred vision  may occur when you apply this  medication . Watery  eyes  may also occur.':'Stinging of the eyes etc.'},inplace=True)
    elif side=='Dizziness , decreased appetite, or  constipation  may occur.':
        data['Sides'].replace({'Dizziness , decreased appetite, or  constipation  may occur.':'Dizziness, constipation etc.'},inplace=True)
    elif side=='Drowsiness,  dizziness ,  headache , irritability,  stomach  upset,  vision  changes (e.g.,  blurred vision ), decreased coordination,  constipation , or  dry mouth /nose/throat may occur.':
        data['Sides'].replace({'Drowsiness,  dizziness ,  headache , irritability,  stomach  upset,  vision  changes (e.g.,  blurred vision ), decreased coordination,  constipation , or  dry mouth /nose/throat may occur.':'Dizziness, constipation etc.'},inplace=True)
    elif side=='Flushing of the face and neck along with warmth,  headache ,  itching , burning,  sweating , chills, or tingling may occur within 20 minutes of taking this  medication . Flushing may persist for a few hours after use.':
        data['Sides'].replace({'Flushing of the face and neck along with warmth,  headache ,  itching , burning,  sweating , chills, or tingling may occur within 20 minutes of taking this  medication . Flushing may persist for a few hours after use.':'Flushing of the upper body etc.'},inplace=True)
    elif side=='Flushing of the upper body may occur if this  medication  is injected too fast ("red man syndrome").':
        data['Sides'].replace({'Flushing of the upper body may occur if this  medication  is injected too fast ("red man syndrome").':'Flushing of the upper body etc.'},inplace=True)
    elif side=='The suspension may cause  headache ,  upset stomach , or  diarrhea . If any of these effects occur, tell your doctor or  pharmacist  promptly and discuss switching to other forms of  nitisinone .':
        data['Sides'].replace({'The suspension may cause  headache ,  upset stomach , or  diarrhea . If any of these effects occur, tell your doctor or  pharmacist  promptly and discuss switching to other forms of  nitisinone .':'Headache, diarrhea etc.'},inplace=True)
    elif side=='Diarrhea ,  headache ,  mouth  pain/sores,  runny nose , voice changes,  weight loss ,  weakness , tiredness, or darkening of the  skin  may occur.':
        data['Sides'].replace({'Diarrhea ,  headache ,  mouth  pain/sores,  runny nose , voice changes,  weight loss ,  weakness , tiredness, or darkening of the  skin  may occur.':'Headache, diarrhea etc.'},inplace=True)
    elif side=='Headache ,  diarrhea ,  runny nose ,  sore throat ,  joint / muscle pain , or  trouble sleeping  may occur.':
        data['Sides'].replace({'Headache ,  diarrhea ,  runny nose ,  sore throat ,  joint / muscle pain , or  trouble sleeping  may occur.':'Headache, diarrhea etc.'},inplace=True)
    elif side=='Temporary redness, stinging, or irritation at the application site may occur.':
        data['Sides'].replace({'Temporary redness, stinging, or irritation at the application site may occur.':'Irritation at the application site may occur etc.'},inplace=True)
    elif side=='Redness, mild  itching , or irritation at the application site may occur.':
        data['Sides'].replace({'Redness, mild  itching , or irritation at the application site may occur.':'Irritation at the application site may occur etc.'},inplace=True)
    elif side=='Burning, stinging, tingling,  rash ,  dry skin , or irritation at the application site may occur.':
        data['Sides'].replace({'Burning, stinging, tingling,  rash ,  dry skin , or irritation at the application site may occur.':'Irritation at the application site may occur etc.'},inplace=True)
    elif side=='Stuffy/ runny nose ,  cough ,  sore throat , or  sneezing  may occur.':
        data['Sides'].replace({'Stuffy/ runny nose ,  cough ,  sore throat , or  sneezing  may occur.':'Stuffy nose etc.'},inplace=True)
    elif side=='Dizziness ,  lightheadedness , drowsiness,  headache , stuffy nose, and  weakness  may occur as your body adjusts to the  medication . If any of these effects persist or worsen, tell your doctor or  pharmacist  promptly.':
        data['Sides'].replace({'Dizziness ,  lightheadedness , drowsiness,  headache , stuffy nose, and  weakness  may occur as your body adjusts to the  medication . If any of these effects persist or worsen, tell your doctor or  pharmacist  promptly.':'Stuffy nose etc.'},inplace=True)
    elif side=='Darkening  skin  or  dry mouth  may occur.':
        data['Sides'].replace({'Darkening  skin  or  dry mouth  may occur.':'Discoloration of the  skin etc.'},inplace=True)
    elif side=='Discoloration of the  skin  or pain, tenderness, or swelling along your veins may occur.':
        data['Sides'].replace({'Discoloration of the  skin  or pain, tenderness, or swelling along your veins may occur.':'Discoloration of the  skin etc.'},inplace=True)
    elif side=='Redness, swelling, or lightening of the  skin  may occur.':
        data['Sides'].replace({'Redness, swelling, or lightening of the  skin  may occur.':'Discoloration of the  skin etc.'},inplace=True)
    elif side=='Pain at the injection site  dizziness ,  lightheadedness ,  weakness ,  muscle cramps ,  upset stomach , or  diarrhea  may occur.':
        data['Sides'].replace({'Pain at the injection site  dizziness ,  lightheadedness ,  weakness ,  muscle cramps ,  upset stomach , or  diarrhea  may occur.':'Pain at the injection site etc.'},inplace=True)
    elif side=='Pain/redness/warmth/bruising/swelling at the injection site, tiredness, fever, or  headache  may occur.':
        data['Sides'].replace({'Pain/redness/warmth/bruising/swelling at the injection site, tiredness, fever, or  headache  may occur.':'Pain at the injection site etc.'},inplace=True)
    elif side=='Pain, bruising,  itching , redness, or swelling at the injection site may occur.':
        data['Sides'].replace({'Pain, bruising,  itching , redness, or swelling at the injection site may occur.':'Pain at the injection site etc.'},inplace=True)
    elif side=='Pain/redness/swelling at the injection site, limited arm movement, muscle ache,  headache , or tiredness may occur.':
        data['Sides'].replace({'Pain/redness/swelling at the injection site, limited arm movement, muscle ache,  headache , or tiredness may occur.':'Pain at the injection site etc.'},inplace=True)
    elif side=='Redness, pain, or tenderness at the injection site may occur.':
        data['Sides'].replace({'Redness, pain, or tenderness at the injection site may occur.':'Pain at the injection site etc.'},inplace=True)
    elif side=='Pain/swelling/redness at the injection site may occur.':
        data['Sides'].replace({'Pain/swelling/redness at the injection site may occur.':'Pain at the injection site etc.'},inplace=True)
    elif side=='Mild  skin  burning, redness, and peeling of the treated area may occur as expected.':
        data['Sides'].replace({'Mild  skin  burning, redness, and peeling of the treated area may occur as expected.':'redness, and peeling of the skin etc.'},inplace=True)
    elif side=='Skin  reactions such as peeling,  itching , irritation, and redness may occur, especially at the start of treatment.':
        data['Sides'].replace({'Skin  reactions such as peeling,  itching , irritation, and redness may occur, especially at the start of treatment.':'redness, and peeling of the skin etc.'},inplace=True)
    elif side=='Stomach  upset or  diarrhea  may occur.':
        data['Sides'].replace({'Stomach  upset or  diarrhea  may occur.':'Stomach  upset,  diarrhea etc.'},inplace=True)
    elif side=='Stomach  upset,  diarrhea , or  bloating  may occur.':
        data['Sides'].replace({'Stomach  upset,  diarrhea , or  bloating  may occur.':'Stomach  upset,  diarrhea etc.'},inplace=True)
    elif side=='Pain at the injection site,  dizziness ,  lightheadedness ,  weakness ,  muscle cramps ,  upset stomach , or  diarrhea  may occur.':
        data['Sides'].replace({'Pain at the injection site,  dizziness ,  lightheadedness ,  weakness ,  muscle cramps ,  upset stomach , or  diarrhea  may occur.':'Stomach  upset,  diarrhea etc.'},inplace=True)       
    elif side=='Irritation or redness of the  skin  may occur.':
        data['Sides'].replace({'Irritation or redness of the  skin  may occur.':'Skin  redness etc.'},inplace=True)
    elif side=='Skin  redness, peeling, oiliness, or dryness may occur.':
        data['Sides'].replace({'Skin  redness, peeling, oiliness, or dryness may occur.':'Skin  redness etc.'},inplace=True)
    elif side=='Irritation, redness, or pain at the injection site may occur.':
        data['Sides'].replace({'Irritation, redness, or pain at the injection site may occur.':'Irritation at the injection site etc.'},inplace=True)
    elif side=='Mild irritation at the injection site may occur.':
        data['Sides'].replace({'Mild irritation at the injection site may occur.':'Irritation at the injection site etc.'},inplace=True)
    elif side=='You may experience  dizziness ,  lightheadedness , and  blurred vision  as your body adjusts to the  medication .':
        data['Sides'].replace({'You may experience  dizziness ,  lightheadedness , and  blurred vision  as your body adjusts to the  medication .':'Dizziness,  lightheadedness, blurred vision  as your body adjusts to the  medication etc .'},inplace=True)
    elif side=='Drowsiness,  dizziness ,  lightheadedness , tiredness,  blurred vision , decreased sexual ability/interest,  nausea ,  constipation , and  dry mouth  may occur as your body adjusts to the  medication . If any of these effects persist or worsen, tell your doctor or  pharmacist  promptly.':
        data['Sides'].replace({'Drowsiness,  dizziness ,  lightheadedness , tiredness,  blurred vision , decreased sexual ability/interest,  nausea ,  constipation , and  dry mouth  may occur as your body adjusts to the  medication . If any of these effects persist or worsen, tell your doctor or  pharmacist  promptly.':'Dizziness,  lightheadedness, blurred vision  as your body adjusts to the  medication etc .'},inplace=True)
    elif side=='Swelling in your  ankles , feet, or hands may occur.':
        data['Sides'].replace({'Swelling in your  ankles , feet, or hands may occur.':'Swelling hands/ ankles /feet etc.'},inplace=True)
    elif side=='Dizziness  or light-headedness may occur as your body adjusts to the  medication . Dry  cough , swelling hands/ ankles /feet, flushing, or  nausea  may also occur.':
        data['Sides'].replace({'Dizziness  or light-headedness may occur as your body adjusts to the  medication . Dry  cough , swelling hands/ ankles /feet, flushing, or  nausea  may also occur.':'Swelling hands/ ankles /feet etc.'},inplace=True)
    elif side=='Tiredness,  mouth /throat pain, or fast/pounding heartbeat may occur.':
        data['Sides'].replace({'Tiredness,  mouth /throat pain, or fast/pounding heartbeat may occur.':'Tiredness etc.'},inplace=True)
    elif side=='Cough ,  stomach pain , or tiredness may occur.':
        data['Sides'].replace({'Cough ,  stomach pain , or tiredness may occur.':'Tiredness etc.'},inplace=True)
    elif side=='Headache , body aches,  diarrhea , or  vomiting  may occur.':
        data['Sides'].replace({'Headache , body aches,  diarrhea , or  vomiting  may occur.':'Diarrhea etc.'},inplace=True)
    elif side=='Diarrhea  is a common side effect.':
        data['Sides'].replace({'Diarrhea  is a common side effect.':'Diarrhea etc.'},inplace=True)
    elif side=='Chest soreness may occur.':
        data['Sides'].replace({'Chest soreness may occur.':'Chest discomfort may occur etc.'},inplace=True)
    elif side=='Cough , chest discomfort, or  headache  may occur.':
        data['Sides'].replace({'Cough , chest discomfort, or  headache  may occur.':'Chest discomfort may occur etc.'},inplace=True)
    elif side=='Fever or  dizziness  may occur.':
        data['Sides'].replace({'Fever or  dizziness  may occur.':'Dizziness  may occur etc.'},inplace=True)
    elif side=='Tiredness, slow heartbeat,  dizziness , or  lightheadedness  may occur.':
        data['Sides'].replace({'Tiredness, slow heartbeat,  dizziness , or  lightheadedness  may occur.':'Dizziness  may occur etc.'},inplace=True)
    elif side=='This  medication  is expected to increase urination.':
        data['Sides'].replace({'This  medication  is expected to increase urination.':'Associated with urination sometimes with blood etc.'},inplace=True)
    elif side=='Bladder  irritation (e.g., difficult/painful/frequent/bloody urination), fever, or chills may occur.':
        data['Sides'].replace({'Bladder  irritation (e.g., difficult/painful/frequent/bloody urination), fever, or chills may occur.':'Associated with urination sometimes with blood etc.'},inplace=True)
    elif side=='Burning,  itching , irritation, dryness,  rash ,  skin  thinning/streaks may occur.':
        data['Sides'].replace({'Burning,  itching , irritation, dryness,  rash ,  skin  thinning/streaks may occur.':'Dry, itchy skin etc.'},inplace=True)
    elif side=='Dizziness ,  diarrhea ,  nausea / vomiting ,  stomach pain , dry/ itchy skin ,  acne , and  dry mouth  may occur.':
        data['Sides'].replace({'Dizziness ,  diarrhea ,  nausea / vomiting ,  stomach pain , dry/ itchy skin ,  acne , and  dry mouth  may occur.':'Dry, itchy skin etc.'},inplace=True)
    elif side=='Some  sunscreen  products (e.g., those containing  aminobenzoic acid  or para-aminobenzoic acid/PABA) may stain clothing.':
        data['Sides'].replace({'Some  sunscreen  products (e.g., those containing  aminobenzoic acid  or para-aminobenzoic acid/PABA) may stain clothing.':'See the Warnings section or may stain clothing.'},inplace=True)
    elif side=='Also see the Warnings section.':
        data['Sides'].replace({'Also see the Warnings section.':'See the Warnings section or may stain clothing.'},inplace=True)

In [None]:
# display side effects of medicine is mentioned only once in the dataframe
data[data['Sides'].isin(data['Sides'].value_counts()[data['Sides'].value_counts()<2].index)]

~ No side effects of medicine which is mentioned only once in the dataframe

In [None]:
# count of unique values
data['Sides'].nunique()

In [None]:
# count of null values
data.isnull().sum()

In [None]:
# filtering out categorical features and displaying it
categorical_features = [feature for feature in data.columns if data[feature].dtypes == 'O']
categorical_features

In [None]:
# filtering out numerical features and displaying it
numerical_features = [feature for feature in data.columns if data[feature].dtypes != 'O']
numerical_features

In [None]:
# filtering out date features and displaying it
date_features = [feature for feature in numerical_features if 'Date' in feature or 'Year' in feature]
date_features 

In [None]:
# displaying unique values in numerical features
for i in numerical_features:
    print(f"{i} column \n")    
    nuni = data[i].nunique()
    print(f"Count of unique values = {nuni}\n")    
    uni = data[i].unique()
    print(f"Unique values = {uni}\n\n\n")    

In [None]:
# filtering out discrete numerical features and displaying it
discrete_numerical_features = [feature for feature in numerical_features if len(data[feature].unique()) > 7 and feature not in date_features+['DrugId']]
discrete_numerical_features

In [None]:
# filtering out continuous numerical features and displaying it
continuous_numerical_features = [feature for feature in numerical_features if feature not in discrete_numerical_features and feature not in date_features+['DrugId']]
continuous_numerical_features

## OUTLIERS IN CONTINUOUS NUMERICAL FEATURE

In [None]:
# boxplot of outliers in continuous numerical features
for i in continuous_numerical_features:
    
    # boxplot of columns
    plt.boxplot(data[i])
    text = "Boxplot of"
    
    # display title
    plt.title(text+" "+i)
    plt.show()

~ From the above boxplot, outliers are not present for any of the columns.

## CORRELATION

In [None]:
# Correlation performed on numerical features
P = data[numerical_features]

In [None]:
# creating an instance for correlation
corrmatrix = round(P.corr(), 2)

# displaying the values of 'corrmatrix'
corrmatrix

In [None]:
# display information about the data
data.info()

In [None]:
# display column names
data.columns

In [None]:
# percentage of different classes in the 'Sides' column
data['Sides'].value_counts(normalize=True)

~ Imbalanced dataset

~ Features: 'Age', 'Condition', 'Drug', 'EaseofUse', 'Effectiveness', 'Reviews', 'Satisfaction', 'Sex', 'UsefulCount', 'Year' columns

~ Target: 'Sides' columns


In [None]:
# put target and remaining columns not considered as features into  separate columns
target = data.pop('Sides')
rem1 = data.pop('Date')
rem2 = data.pop('DrugId')

# display first 5 rows
data.head()

In [None]:
# making a copy of the dataframe

# features
X = data.copy()

# target
y = target.copy()

## ENCODING

~ encoding columns 'Age', 'Condition', 'Drug', 'Reviews', 'Sex', 'Year', 'Sides'


In [None]:
# converting categorical data to numeric

X['Age'] = pd.factorize(X['Age'])[0]
X['Condition'] = pd.factorize(X['Condition'])[0]
X['Drug'] = pd.factorize(X['Drug'])[0]
X['Reviews'] = pd.factorize(X['Reviews'])[0]
X['Sex'] = pd.factorize(X['Sex'])[0]
X['Year'] = pd.factorize(X['Year'])[0]

y = pd.factorize(y)[0]

# display first 5 rows of the dataframe
X.head()

In [None]:
# display
y

In [None]:
# summarize observations by class label
counter = Counter(y)
print(counter)

## SCALING

**1) standard scaler**

In [None]:
# scaling features
stdscaler = StandardScaler()
X1 = stdscaler.fit_transform(X)

In [None]:
# display
X1

In [None]:
# converting it to a dataframe
X1 = pd.DataFrame(X1, columns=['Age', 'Condition', 'Drug', 'EaseofUse', 'Effectiveness', 'Reviews', 'Satisfaction', 'Sex', 'UsefulCount', 'Year'])

In [None]:
# display first 5 rows of features
X1.head()

In [None]:
# target
y

## SPLITTING THE DATASET INTO TRAINING AND TESTING

In [None]:
# splitting the data into testing and training
X_train, X_test, y_train, y_test = train_test_split(X1, y, test_size=0.25, random_state=42, stratify = y)

In [None]:
# taking a variable skfold_validator and specifying the number of splits = 2 
skfold_validator = StratifiedKFold(n_splits=2)

## STRATIFIED K-FOLD CROSS VALIDATION

In [None]:
# check which all index are given for training and testing
for train_index, test_index in skfold_validator.split(X1,y):
# skfold_validator.split(X,y) - using skfold_validator splitting X and y
    print('Training Index:', train_index)
    print('Testing Index:', test_index)
    # X_train1, X_test1 = X1[train_index], X1[test_index]
    # y_train, y_test = y[train_index], y[test_index]

In [None]:
# count of rows and columns in training data
X_train.shape, y_train.shape

In [None]:
# count of rows and columns in testing data
X_test.shape, y_test.shape

In [None]:
# function to print average accuracy score across the two folds, and the standard deviation of the accuracy score 
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))
        

## MODEL

### Decision Tree

In [None]:
# creating an instance for decision tree classifier
dec_tree = DecisionTreeClassifier(random_state=42) 

In [None]:
# creating model using training data 
dec_model = dec_tree.fit(X_train, y_train)

**STRATIFIED K FOLD CROSS VALIDATION**

In [None]:
# calculate cross validation score of the training dataset

# calculate cross validation score
cv_decresult = cross_val_score(dec_model, X_train, y_train, cv=skfold_validator)

# display cross validation score of each folds
print("Cross validation score of each fold: ", cv_decresult)

# display mean of cross validation score
print("Mean of cross validation score across 2 folds: ", np.mean(cv_decresult))

In [None]:
# calculate cross validation score of the entire dataset

# calculate cross validation score
cv_decresult = cross_val_score(dec_model, X1, y, cv=skfold_validator)

# display cross validation score of each folds
print("Cross validation score of each fold: ", cv_decresult)

# display mean of cross validation score
print("Mean of cross validation score across 2 folds: ", np.mean(cv_decresult))

**TUNING DECISION TREE**

In [None]:
# defining parameter range
param_grid = {'max_depth': [None, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
            'min_samples_leaf': [1, 5, 10, 20, 50, 100],
            'criterion': ["gini", "entropy"]} 

In [None]:
# Create a GridSearchCV object
grid = GridSearchCV(dec_tree, param_grid, refit = True, verbose = 3, n_jobs=-1, cv=skfold_validator)

In [None]:
# fitting the training data
grid.fit(X_train, y_train)

In [None]:
# calling function
print_results(grid)

**MODEL EVALUATION**

**Fit best models on full training set**

Accuracy scores of top few models
*********
0.978 (+/-0.001) for {'criterion': 'entropy', 'max_depth': None, 'min_samples_leaf': 1}

0.978 (+/-0.001) for {'criterion': 'entropy', 'max_depth': 19, 'min_samples_leaf': 1}

0.978 (+/-0.001) for {'criterion': 'entropy', 'max_depth': 20, 'min_samples_leaf': 1}

0.977 (+/-0.001) for {'criterion': 'entropy', 'max_depth': 16, 'min_samples_leaf': 1}

0.977 (+/-0.001) for {'criterion': 'entropy', 'max_depth': 17, 'min_samples_leaf': 1}

0.977 (+/-0.001) for {'criterion': 'entropy', 'max_depth': 18, 'min_samples_leaf': 1}

0.976 (+/-0.001) for {'criterion': 'entropy', 'max_depth': 15, 'min_samples_leaf': 1}

In [None]:
# create instances of top few models and fit the training data
dectree_model1 = DecisionTreeClassifier(max_depth = None, min_samples_leaf = 1, criterion = 'entropy', random_state=42).fit(X_train, y_train)
dectree_model2 = DecisionTreeClassifier(max_depth = 19, min_samples_leaf = 1, criterion = 'entropy', random_state=42).fit(X_train, y_train)
dectree_model3 = DecisionTreeClassifier(max_depth = 20, min_samples_leaf = 1, criterion = 'entropy', random_state=42).fit(X_train, y_train)
dectree_model4 = DecisionTreeClassifier(max_depth = 16, min_samples_leaf = 1, criterion = 'entropy', random_state=42).fit(X_train, y_train)
dectree_model5 = DecisionTreeClassifier(max_depth = 17, min_samples_leaf = 1, criterion = 'entropy', random_state=42).fit(X_train, y_train)
dectree_model6 = DecisionTreeClassifier(max_depth = 18, min_samples_leaf = 1, criterion = 'entropy', random_state=42).fit(X_train, y_train)
dectree_model7 = DecisionTreeClassifier(max_depth = 15, min_samples_leaf = 1, criterion = 'entropy', random_state=42).fit(X_train, y_train)

In [None]:
# finding accuracy, precision, recall of top few models
for mdl in [dectree_model1, dectree_model2, dectree_model3, dectree_model4, dectree_model5, dectree_model6, dectree_model7]:
    y_pred = mdl.predict(X_test) 
    accuracy = round(accuracy_score(y_test, y_pred), 3)
    precision = round(precision_score(y_test, y_pred, average = 'micro'), 3)
    recall = round(recall_score(y_test, y_pred, average = 'micro'), 3)
    print('max_depth: {} / min_samples_leaf: {} / criterion: {} -- A: {} / P: {} / R: {}'.format(mdl.max_depth, mdl.min_samples_leaf, mdl.criterion, accuracy, precision, recall))

In [None]:
# best model prediction

# create instance and fit the training data
dectree_best = DecisionTreeClassifier(max_depth = None, min_samples_leaf = 1, criterion = 'entropy', random_state=42)
dectree_bestmodel = dectree_best.fit(X_train, y_train)

# Make predictions
y_predict_tree = {
    "train": dectree_bestmodel.predict(X_train),
    "test": dectree_bestmodel.predict(X_test),
}
    
# Create train and test scores
scores_base = {
    "train": accuracy_score(y_train, y_predict_tree["train"]),
    "test": accuracy_score(y_test, y_predict_tree["test"]),
}
hamming = hamming_loss(y_test, y_predict_tree["test"])

print(f" Accuracy score of training data: {scores_base['train']*100}%")
print(f"Accuracy score of test data: {scores_base['test']*100}%")
print(f"Incorrect predictions of test data: {hamming*100}%")

In [None]:
# feature importance
dec_features_tree = pd.DataFrame({"feature_name": X_train.columns, "importance": dectree_bestmodel.feature_importances_})
dec_features_tree.sort_values(by=["importance"], ascending=False, inplace=True)

# display feature importance
print(dec_features_tree.head())

In [None]:
# display classification report
print(classification_report(y_test, y_predict_tree["test"], zero_division=1))

In [None]:
# save the model
filename = 'savedmodel.sav'

# save the model using pickle
pickle.dump(dectree_best, open(filename, 'wb'))
# wb-write binary