In [64]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

In [65]:
df = pd.read_csv('drug200.csv')

In [66]:
# Task 1 : Read the dataset and do data pre-processing

In [67]:
label_encoder = LabelEncoder()
df['Sex'] = label_encoder.fit_transform(df['Sex'])
df['BP'] = label_encoder.fit_transform(df['BP'])
df['Cholesterol'] = label_encoder.fit_transform(df['Cholesterol'])
df['Drug'] = label_encoder.fit_transform(df['Drug'])
print(df.head())

   Age  Sex  BP  Cholesterol  Na_to_K  Drug
0   23    0   0            0   25.355     0
1   47    1   1            0   13.093     3
2   47    1   1            0   10.114     3
3   28    0   2            0    7.798     4
4   61    0   1            0   18.043     0


In [68]:
# Scale numerical variables

scaler = StandardScaler()
df[['Age', 'Na_to_K']] = scaler.fit_transform(df[['Age', 'Na_to_K']])

In [69]:
# Separate features and labels

x = df[['Age', 'Sex', 'BP', 'Cholesterol', 'Na_to_K']]
y = df['Drug']

In [70]:
# Split the dataset into training and testing sets

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(X_train.shape)
print(y_test.shape)

(160, 5)
(40,)


In [71]:
# Task 2 : Build the ANN model with (input layer, min 3 hidden layers & output layer)

In [72]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [73]:
# Define the model architecture

model = Sequential()
model.add(Dense(64, activation='relu', input_shape=(5,)))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(5, activation='softmax'))

In [74]:
x = df.iloc[:,0:5]
y = df.iloc[:,5:]
print(x)
print(y)

          Age  Sex  BP  Cholesterol   Na_to_K
0   -1.291591    0   0            0  1.286522
1    0.162699    1   1            0 -0.415145
2    0.162699    1   1            0 -0.828558
3   -0.988614    0   2            0 -1.149963
4    1.011034    0   1            0  0.271794
..        ...  ...  ..          ...       ...
195  0.708057    0   1            0 -0.626917
196 -1.715759    1   1            0 -0.565995
197  0.465676    1   2            0 -0.859089
198 -1.291591    1   2            1 -0.286500
199 -0.261469    0   1            1 -0.657170

[200 rows x 5 columns]
     Drug
0       0
1       3
2       3
3       4
4       0
..    ...
195     3
196     3
197     4
198     4
199     4

[200 rows x 1 columns]


In [75]:
# Compile the model

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

y_train_encoded = label_encoder.fit_transform(y_train)
y_test_encoded = label_encoder.transform(y_test)
model.fit(X_train, y_train_encoded, epochs=20, batch_size=20, validation_data=(X_test, y_test_encoded))

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fc722a7be20>

In [76]:
y_pred = model.predict(x_test)
y_pred





array([[4.13405127e-04, 1.27605614e-04, 2.03855492e-07, 7.50870770e-03,
        9.91949975e-01],
       [9.94201958e-01, 5.14725503e-03, 2.99533876e-05, 4.84759919e-04,
        1.36094895e-04],
       [2.79626124e-06, 1.99977421e-06, 5.16646413e-11, 6.72629918e-04,
        9.99322474e-01],
       [2.83280946e-03, 3.48852053e-02, 8.92015360e-03, 7.59812355e-01,
        1.93549350e-01],
       [9.99999940e-01, 3.28292191e-19, 1.42062910e-17, 8.46457494e-17,
        5.58904698e-17],
       [9.99691248e-01, 2.56415988e-05, 2.51631485e-04, 2.94335568e-05,
        2.17517095e-06],
       [9.99999940e-01, 3.61117553e-10, 4.05409484e-10, 1.11134280e-09,
        9.09846420e-10],
       [7.46123632e-03, 1.53253040e-05, 2.05253734e-08, 1.85971186e-02,
        9.73926246e-01],
       [4.89533022e-02, 8.14404786e-01, 6.96765035e-02, 5.54476641e-02,
        1.15178749e-02],
       [3.14717290e-05, 3.12856696e-06, 1.03769771e-07, 3.07339523e-03,
        9.96891856e-01],
       [8.33706290e-04, 9.4475

In [77]:
comp = pd.DataFrame(y_test_encoded)  # Creating a dataframe
comp.columns = ['Actual Value']  # Changing the column name
comp

Unnamed: 0,Actual Value
0,4
1,0
2,4
3,3
4,0
5,0
6,0
7,4
8,1
9,4


In [78]:
# Print the model summary

model.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_5 (Dense)             (None, 64)                384       
                                                                 
 dense_6 (Dense)             (None, 128)               8320      
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dense_8 (Dense)             (None, 32)                2080      
                                                                 
 dense_9 (Dense)             (None, 5)                 165       
                                                                 
Total params: 19,205
Trainable params: 19,205
Non-trainable params: 0
_________________________________________________________________


In [79]:
# Task 3 : Test the model with random data

In [80]:
# Generate random data for testing

random_data = np.random.rand(1, 5)
random_data

array([[0.87039758, 0.52583504, 0.74177248, 0.71396893, 0.03728909]])

In [81]:
# Make predictions

predictions = model.predict(random_data)
predictions





array([[9.9052775e-01, 3.0603227e-05, 6.6905326e-05, 1.3001083e-03,
        8.0746198e-03]], dtype=float32)

In [82]:
# Get the predicted drug class

predicted_class = np.argmax(predictions)

In [84]:
# Print the predicted class

print("Predicted Drug Class :", predicted_class)

Predicted Drug Class : 0
