In [12]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [13]:
df = pd.read_csv('drug200.csv')

In [14]:
# Task 1 : Read the dataset and do data pre-processing

In [15]:
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['BP'] = label_encoder.fit_transform(df['BP'])
df['Cholesterol'] = label_encoder.fit_transform(df['Cholesterol'])
df['Drug'] = label_encoder.fit_transform(df['Drug'])
print(df.head())

   Age  Sex  BP  Cholesterol  Na_to_K  Drug
0   23    0   0            0   25.355     0
1   47    1   1            0   13.093     3
2   47    1   1            0   10.114     3
3   28    0   2            0    7.798     4
4   61    0   1            0   18.043     0


In [16]:
# Scale numerical variables

scaler = StandardScaler()
df[['Age', 'Na_to_K']] = scaler.fit_transform(df[['Age', 'Na_to_K']])

In [17]:
# Separate features and labels

x = df[['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']]
y = df['Drug']

In [18]:
from sklearn.model_selection import train_test_split

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_test.shape)

(160, 5)
(40,)


In [19]:
# Task 2 : Build the ANN model with (input layer, min 3 hidden layers & output layer)

In [20]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [21]:
# Define the model architecture

model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(5,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(5, activation='softmax'))

In [22]:
x = df.iloc[:,0:5]
y = df.iloc[:,5:]
print(x)
print(y)

          Age  Sex  BP  Cholesterol   Na_to_K
0   -1.291591    0   0            0  1.286522
1    0.162699    1   1            0 -0.415145
2    0.162699    1   1            0 -0.828558
3   -0.988614    0   2            0 -1.149963
4    1.011034    0   1            0  0.271794
..        ...  ...  ..          ...       ...
195  0.708057    0   1            0 -0.626917
196 -1.715759    1   1            0 -0.565995
197  0.465676    1   2            0 -0.859089
198 -1.291591    1   2            1 -0.286500
199 -0.261469    0   1            1 -0.657170

[200 rows x 5 columns]
     Drug
0       0
1       3
2       3
3       4
4       0
..    ...
195     3
196     3
197     4
198     4
199     4

[200 rows x 1 columns]


In [23]:
# Compile the model

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
model.fit(X_train, y_train_encoded, epochs=20, batch_size=20, validation_data=(X_test, y_test_encoded))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7f3a999c3430>

In [25]:
y_pred = model.predict(X_test)
y_pred



array([[3.32973141e-04, 1.07874541e-04, 3.70556081e-05, 4.03605439e-02,
        9.59161580e-01],
       [9.05583501e-01, 9.22425687e-02, 1.59223695e-04, 1.98959676e-03,
        2.51983747e-05],
       [2.87211901e-06, 6.66109429e-07, 6.08252648e-08, 6.04115799e-03,
        9.93955255e-01],
       [3.65072326e-03, 6.53086463e-03, 1.60883032e-02, 5.71730494e-01,
        4.01999474e-01],
       [9.99999940e-01, 8.85720582e-17, 2.08110101e-18, 1.50031761e-13,
        4.84017072e-15],
       [9.99707222e-01, 1.02892809e-04, 7.40061005e-05, 1.07132699e-04,
        8.78670289e-06],
       [9.99999940e-01, 1.43484169e-09, 7.96321620e-11, 1.24606121e-08,
        8.03239975e-10],
       [8.31738301e-03, 5.86460510e-06, 2.60447609e-06, 2.53010932e-02,
        9.66373086e-01],
       [3.74746881e-02, 8.40798914e-01, 1.04410119e-01, 1.54665643e-02,
        1.84972491e-03],
       [1.48264062e-05, 5.30763145e-07, 2.99964586e-06, 7.22244894e-03,
        9.92759168e-01],
       [1.79683178e-04, 9.9526

In [26]:
comp = pd.DataFrame(y_test_encoded)  # Creating a dataframe
comp.columns = ['Actual Value']  # Changing the column name
comp

Unnamed: 0,Actual Value
0,4
1,0
2,4
3,3
4,0
5,0
6,0
7,4
8,1
9,4


In [27]:
# Print the model summary

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 64)                384       
                                                                 
 dense_1 (Dense)             (None, 128)               8320      
                                                                 
 dense_2 (Dense)             (None, 64)                8256      
                                                                 
 dense_3 (Dense)             (None, 32)                2080      
                                                                 
 dense_4 (Dense)             (None, 5)                 165       
                                                                 
Total params: 19,205
Trainable params: 19,205
Non-trainable params: 0
_________________________________________________________________


In [28]:
# Task 3 : Test the model with random data

In [29]:
# Generate random data for testing

random_data = np.random.rand(1, 5)
random_data

array([[0.19643671, 0.51369899, 0.41235936, 0.32544689, 0.96184379]])

In [30]:
# Make predictions

predictions = model.predict(random_data)
predictions



array([[9.9999893e-01, 3.0973922e-08, 5.5587654e-09, 9.2188361e-07,
        1.3840597e-07]], dtype=float32)

In [31]:
# Get the predicted drug class

predicted_class = np.argmax(predictions)

In [32]:
# Print the predicted class

print("Predicted Drug Class :", predicted_class)

Predicted Drug Class : 0
