# **Build an ANN model for Drug classification.**

### **Load the dataset**

In [None]:
import pandas as pd

In [None]:
data=pd.read_csv("drug200.csv")

In [None]:
data.head(3)

Unnamed: 0,Age,Sex,BP,Cholesterol,Na_to_K,Drug
0,23,F,HIGH,HIGH,25.355,DrugY
1,47,M,LOW,HIGH,13.093,drugC
2,47,M,LOW,HIGH,10.114,drugC


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 6 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Age          200 non-null    int64  
 1   Sex          200 non-null    object 
 2   BP           200 non-null    object 
 3   Cholesterol  200 non-null    object 
 4   Na_to_K      200 non-null    float64
 5   Drug         200 non-null    object 
dtypes: float64(1), int64(1), object(4)
memory usage: 9.5+ KB


### **Data Preprocessing**

In [None]:
data.isnull().sum()

Age            0
Sex            0
BP             0
Cholesterol    0
Na_to_K        0
Drug           0
dtype: int64

In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
le = LabelEncoder()
data.Sex= le.fit_transform(data.Sex)
data.BP = le.fit_transform(data.BP)
data.Cholesterol = le.fit_transform(data.Cholesterol)
data.Drug = le.fit_transform(data.Drug)

In [None]:
x = data.drop("Drug",axis=1).values
y= data["Drug"].values

In [None]:
from sklearn.preprocessing import StandardScaler

In [None]:
scaler = StandardScaler()
x_scaled = scaler.fit_transform(x)

In [None]:
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x_scaled,y, test_size=0.3, random_state=0)

In [None]:
from tensorflow import keras

In [None]:
Y_train = keras.utils.to_categorical(y_train)
Y_test = keras.utils.to_categorical(y_test)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [None]:
model=Sequential()
model.add(Dense(12, input_dim=5 ,activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(36, activation='relu'))
model.add(Dense(5, activation='softmax'))

In [None]:
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_8 (Dense)             (None, 12)                72        
                                                                 
 dense_9 (Dense)             (None, 24)                312       
                                                                 
 dense_10 (Dense)            (None, 36)                900       
                                                                 
 dense_11 (Dense)            (None, 5)                 185       
                                                                 
Total params: 1469 (5.74 KB)
Trainable params: 1469 (5.74 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
model.compile(loss = "categorical_crossentropy",optimizer = 'adam', metrics = ["accuracy"])

In [None]:
#Train the model
model.fit(x_train,Y_train, batch_size=10, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.src.callbacks.History at 0x78f9c4021540>

In [None]:
loss, accuracy = model.evaluate(x_test, Y_test)
print("Accuracy is:", accuracy*100)

Accuracy is: 93.33333373069763


In [None]:
y_pred = model.predict(x_test)
y_pred



array([[1.11547783e-02, 2.68667877e-01, 6.24854080e-02, 5.44424057e-01,
        1.13267966e-01],
       [2.00982451e-01, 1.64587836e-04, 4.54284076e-04, 3.41333933e-02,
        7.64265299e-01],
       [9.87202168e-01, 4.57107526e-04, 5.48742013e-04, 2.82519497e-03,
        8.96683894e-03],
       [9.99930322e-01, 8.80834796e-06, 6.39092320e-07, 1.29905748e-05,
        4.71919375e-05],
       [8.45548868e-01, 1.15741685e-04, 1.25409686e-04, 8.03770218e-03,
        1.46172404e-01],
       [6.73587501e-01, 4.89486242e-03, 1.71363365e-03, 2.31374372e-02,
        2.96666592e-01],
       [9.87609196e-03, 3.75641452e-04, 1.35387573e-03, 9.48481858e-02,
        8.93546164e-01],
       [2.01995865e-01, 2.98835873e-03, 1.39031140e-02, 3.56390551e-02,
        7.45473444e-01],
       [8.24416459e-01, 1.24255791e-02, 1.52383512e-02, 5.95043376e-02,
        8.84151980e-02],
       [1.01387789e-02, 8.91462783e-04, 2.83252858e-02, 3.28219682e-02,
        9.27822411e-01],
       [1.41099636e-02, 9.3208

In [None]:
import numpy as np

In [None]:
num_sample = 5
#generating random data of the same shapes as input features
random_input = np.random.rand(num_sample, x_train.shape[1])
#scaling the random input data
random_input_scaled= scaler.transform(random_input)
#predicting using the trained model
prediction = model.predict(random_input_scaled)
#decode one-hot encoded predictions to get the predicted drug
predicted_drug = [np.argmax(pred) for pred in prediction]
#mapping predicted drug index to actual drug name
drug_mapping = {0:"DrugA",1:"DrugB", 2:"DrugC", 3:"DrugX", 4:"DrugY"}
predicted_drugs_names = [drug_mapping[pred] for pred in predicted_drug]
#displaying the random input and predicted drugs
print("Random Input data:")
print(random_input)
print("\nPredicted Drugs:")
print(predicted_drugs_names)

Random Input data:
[[0.28024748 0.14228604 0.44252898 0.63785364 0.54953416]
 [0.56451333 0.8372912  0.38280439 0.9589858  0.50787152]
 [0.74702089 0.40041127 0.30203093 0.93130604 0.98739654]
 [0.24557271 0.89221998 0.69483615 0.73804033 0.42076802]
 [0.59580352 0.37690642 0.62392174 0.49511191 0.03279702]]

Predicted Drugs:
['DrugB', 'DrugB', 'DrugB', 'DrugB', 'DrugB']
