In [36]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [37]:
df = pd.read_csv('/content/drug200.csv')

In [38]:
df.head()


Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC
3,28,F,NORMAL,HIGH,7.798,drugX
4,61,F,LOW,HIGH,18.043,DrugY


In [39]:
df.tail()

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
195,56,F,LOW,HIGH,11.567,drugC
196,16,M,LOW,HIGH,12.006,drugC
197,52,M,NORMAL,HIGH,9.894,drugX
198,23,M,NORMAL,NORMAL,14.02,drugX
199,40,F,LOW,NORMAL,11.349,drugX


In [40]:
df.describe(include='all')


Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
count,200.0,200,200,200,200.0,200
unique,,2,3,2,,5
top,,M,HIGH,HIGH,,DrugY
freq,,104,77,103,,91
mean,44.315,,,,16.084485,
std,16.544315,,,,7.223956,
min,15.0,,,,6.269,
25%,31.0,,,,10.4455,
50%,45.0,,,,13.9365,
75%,58.0,,,,19.38,


In [41]:
df.isnull().sum()


Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

**Split dependent and independent**

In [42]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          200 non-null    int64  
 1   Sex          200 non-null    object 
 2   BP           200 non-null    object 
 3   Cholesterol  200 non-null    object 
 4   Na_to_K      200 non-null    float64
 5   Drug         200 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 9.5+ KB


In [43]:
x = df.iloc[:,0:5] 
x.head(2)

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K
0,23,F,HIGH,HIGH,25.355
1,47,M,LOW,HIGH,13.093


In [44]:
y = df.iloc[:,5:] 
y[0:2]

Unnamed: 0,Drug
0,DrugY
1,drugC


**Split the training and testing data**

In [45]:
from sklearn.model_selection import train_test_split


In [46]:
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.2,random_state=12)


In [47]:
x.shape, y.shape


((200, 5), (200, 1))

In [48]:
xtrain.shape, xtest.shape


((160, 5), (40, 5))

In [49]:
ytrain.shape, ytest.shape


((160, 1), (40, 1))

** Encoding**

In [50]:
from sklearn.preprocessing import LabelEncoder


In [51]:
le = LabelEncoder()

In [52]:
xtrain['Sex'] = le.fit_transform(xtrain['Sex'])
xtrain['Sex']

77     0
47     1
94     1
14     0
41     0
      ..
3      0
130    0
134    0
155    1
75     1
Name: Sex, Length: 160, dtype: int64

In [53]:
xtest['Sex'] = le.transform(xtest['Sex'])
xtest['Sex']

23     0
182    0
172    0
21     1
63     1
11     0
61     1
32     1
164    1
7      1
95     1
173    0
132    1
120    1
98     1
138    1
31     1
16     1
181    0
85     1
185    0
166    0
101    0
129    0
113    0
51     1
167    0
88     0
108    1
136    0
29     1
183    0
152    1
93     0
189    1
139    0
179    0
55     0
1      1
5      0
Name: Sex, dtype: int64

In [54]:
xtrain['BP'] = le.fit_transform(xtrain['BP'])
xtrain['BP']

77     0
47     1
94     1
14     2
41     0
      ..
3      2
130    2
134    0
155    1
75     1
Name: BP, Length: 160, dtype: int64

In [55]:
xtest['BP'] = le.transform(xtest['BP'])
xtest['BP']

23     1
182    1
172    2
21     2
63     1
11     0
61     0
32     1
164    0
7      1
95     1
173    1
132    1
120    2
98     0
138    0
31     0
16     1
181    2
85     0
185    2
166    1
101    0
129    2
113    1
51     2
167    2
88     0
108    0
136    0
29     1
183    0
152    2
93     1
189    0
139    2
179    2
55     1
1      1
5      2
Name: BP, dtype: int64

In [56]:
xtrain['Cholesterol'] = le.fit_transform(xtrain['Cholesterol'])
xtrain['Cholesterol']

77     1
47     0
94     0
14     0
41     1
      ..
3      0
130    0
134    0
155    0
75     1
Name: Cholesterol, Length: 160, dtype: int64

In [57]:
xtest['Cholesterol'] = le.transform(xtest['Cholesterol'])
xtest['Cholesterol']

23     0
182    1
172    1
21     0
63     1
11     1
61     1
32     1
164    1
7      0
95     1
173    1
132    1
120    0
98     1
138    1
31     0
16     1
181    0
85     0
185    1
166    0
101    0
129    0
113    1
51     1
167    0
88     1
108    1
136    0
29     0
183    1
152    1
93     1
189    1
139    0
179    0
55     0
1      0
5      0
Name: Cholesterol, dtype: int64

In [58]:
ytrain['Drug'] = le.fit_transform(ytrain['Drug'])
ytrain['Drug']

77     0
47     3
94     0
14     4
41     2
      ..
3      4
130    0
134    0
155    3
75     0
Name: Drug, Length: 160, dtype: int64

In [59]:
ytest['Drug'] = le.transform(ytest['Drug'])
ytest['Drug']

23     0
182    4
172    0
21     0
63     4
11     0
61     1
32     4
164    0
7      3
95     4
173    0
132    4
120    0
98     0
138    2
31     2
16     4
181    4
85     2
185    0
166    0
101    1
129    4
113    4
51     4
167    4
88     0
108    2
136    2
29     0
183    0
152    4
93     0
189    0
139    4
179    0
55     3
1      3
5      4
Name: Drug, dtype: int64

In [60]:
xtrain.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 160 entries, 77 to 75
Data columns (total 5 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          160 non-null    int64  
 1   Sex          160 non-null    int64  
 2   BP           160 non-null    int64  
 3   Cholesterol  160 non-null    int64  
 4   Na_to_K      160 non-null    float64
dtypes: float64(1), int64(4)
memory usage: 7.5 KB


In [61]:
ytrain.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 160 entries, 77 to 75
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype
---  ------  --------------  -----
 0   Drug    160 non-null    int64
dtypes: int64(1)
memory usage: 2.5 KB


In [62]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

**ANN**

In [63]:

# Initializing the seq model
reg_model = Sequential()
# Adding the input layer to the model
reg_model.add(Dense(5,activation='relu'))
# Adding the 1st hidden layer to the model
reg_model.add(Dense(64,activation='relu'))
# Adding the 2nd hidden layer to the model
reg_model.add(Dense(32,activation='relu'))
# Adding the 3rd hidden layer to the model
reg_model.add(Dense(16,activation='relu'))
# Adding output layer
reg_model.add(Dense(1,activation='linear'))

In [64]:
reg_model.compile(optimizer='adam',loss='mse',metrics=['accuracy'])

**Training the model**

In [65]:
reg_model.fit(xtrain,ytrain,batch_size=10,epochs=10,validation_data=(xtest,ytest))


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x7fa2a0fd5540>

In [66]:
reg_model.summary()


Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (10, 5)                   30        
                                                                 
 dense_6 (Dense)             (10, 64)                  384       
                                                                 
 dense_7 (Dense)             (10, 32)                  2080      
                                                                 
 dense_8 (Dense)             (10, 16)                  528       
                                                                 
 dense_9 (Dense)             (10, 1)                   17        
                                                                 
Total params: 3,039
Trainable params: 3,039
Non-trainable params: 0
_________________________________________________________________


**TESTING THE MODEL**

In [70]:
ypred = reg_model.predict([[2,4,5,7,8]])
ypred



array([[0.29858866]], dtype=float32)

In [71]:
np.argmax(ypred)

0

In [67]:
ypred = reg_model.predict(xtest)




In [68]:
comp = pd.DataFrame(ytest)  # Creating a dataframe
comp.columns = ['Actual Value']  # Changing the column name
comp['Predicted values'] = ypred  # Creating a column based onth prediction
comp


Unnamed: 0,Actual Value,Predicted values
23,0,1.192041
182,4,0.380266
172,0,0.527034
21,0,0.753658
63,4,1.752983
11,0,0.508389
61,1,0.530825
32,4,1.70751
164,0,0.12166
7,3,1.325742
