In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
df=pd.read_csv("/content/drug200.csv")

In [None]:
# Task 1 : Read the dataset and do data pre-processing


In [3]:
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['BP'] = label_encoder.fit_transform(df['BP'])
df['Cholesterol'] = label_encoder.fit_transform(df['Cholesterol'])
df['Drug'] = label_encoder.fit_transform(df['Drug'])
print(df.head())


   Age  Sex  BP  Cholesterol  Na_to_K  Drug
0   23    0   0            0   25.355     0
1   47    1   1            0   13.093     3
2   47    1   1            0   10.114     3
3   28    0   2            0    7.798     4
4   61    0   1            0   18.043     0


In [4]:
# Scale numerical variables
scaler = StandardScaler()
df[['Age', 'Na_to_K']] = scaler.fit_transform(df[['Age', 'Na_to_K']])


In [5]:
# Separate features and labels
x = df[['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']]
y = df['Drug']


In [9]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2,random_state=42)
print(X_train.shape)
print(y_test.shape)


(160, 5)
(40,)


In [None]:
# Task 2 : Build the ANN model with (input layer, min 3 hidden layers & output layer)


In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [11]:
# Define the model architecture
model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(5,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(5, activation='softmax'))


In [12]:
x = df.iloc[:,0:5]
y = df.iloc[:,5:]
print(x)
print(y)


          Age  Sex  BP  Cholesterol   Na_to_K
0   -1.291591    0   0            0  1.286522
1    0.162699    1   1            0 -0.415145
2    0.162699    1   1            0 -0.828558
3   -0.988614    0   2            0 -1.149963
4    1.011034    0   1            0  0.271794
..        ...  ...  ..          ...       ...
195  0.708057    0   1            0 -0.626917
196 -1.715759    1   1            0 -0.565995
197  0.465676    1   2            0 -0.859089
198 -1.291591    1   2            1 -0.286500
199 -0.261469    0   1            1 -0.657170

[200 rows x 5 columns]
     Drug
0       0
1       3
2       3
3       4
4       0
..    ...
195     3
196     3
197     4
198     4
199     4

[200 rows x 1 columns]


In [15]:
# Compile the model
model.compile(loss='sparse_categorical_crossentropy', optimizer='adam',metrics=['accuracy'])
y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
model.fit(X_train, y_train_encoded, epochs=20, batch_size=20,validation_data=(X_test, y_test_encoded))


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f17f1d85450>

In [17]:
y_pred = model.predict(X_test)
y_pred




array([[1.75110588e-04, 1.73789554e-03, 1.44960586e-05, 5.00680618e-02,
        9.48004484e-01],
       [7.82621980e-01, 2.16004521e-01, 3.51645227e-04, 8.19196168e-04,
        2.02563679e-04],
       [1.31217871e-06, 2.48581528e-05, 1.79198825e-08, 5.90924686e-03,
        9.94064510e-01],
       [1.10933697e-03, 1.77982450e-02, 4.35904972e-03, 6.03691101e-01,
        3.73042256e-01],
       [9.99999940e-01, 7.58614950e-17, 1.11155350e-15, 4.90187216e-14,
        2.88596865e-14],
       [9.97639656e-01, 5.33648767e-04, 1.25601247e-03, 4.66648547e-04,
        1.04025996e-04],
       [9.99999940e-01, 2.11565112e-08, 3.85568066e-09, 3.67226649e-09,
        2.14750395e-09],
       [5.58930449e-03, 7.64277065e-05, 2.70903138e-06, 1.96509212e-02,
        9.74680543e-01],
       [7.11830258e-02, 7.97181308e-01, 1.06061690e-01, 1.54021150e-02,
        1.01719005e-02],
       [4.90675075e-06, 8.19227716e-06, 1.01620469e-06, 6.60446566e-03,
        9.93381321e-01],
       [1.19740621e-03, 9.7982

In [18]:
comp = pd.DataFrame(y_test_encoded) # Creating a dataframe
comp.columns = ['Actual Value'] # Changing the column name
comp


Unnamed: 0,Actual Value
0,4
1,0
2,4
3,3
4,0
5,0
6,0
7,4
8,1
9,4


In [19]:
# Print the model summary
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                384       
                                                                 
 dense_1 (Dense)             (None, 128)               8320      
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 5)                 165       
                                                                 
Total params: 19,205
Trainable params: 19,205
Non-trainable params: 0
_________________________________________________________________


In [20]:
# Task 3 : Test the model with random data

In [21]:
# Generate random data for testing
random_data = np.random.rand(1, 5)
random_data


array([[0.5902534 , 0.76205588, 0.66713313, 0.58979546, 0.59070951]])

In [22]:
# Make predictions
predictions = model.predict(random_data)
predictions



array([[9.9999714e-01, 2.3604699e-08, 2.1938440e-07, 8.7394307e-07,
        1.7675379e-06]], dtype=float32)

In [23]:
# Get the predicted drug class
predicted_class = np.argmax(predictions)


In [24]:
# Print the predicted class
print("Predicted Drug Class :", predicted_class)

Predicted Drug Class : 0
