In [27]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense


# Load the dataset
data = pd.read_csv("/content/sample_data/drug200.csv")

# Display the first few rows of the dataset
print(data.head())


   Age Sex      BP Cholesterol  Na_to_K   Drug
0   23   F    HIGH        HIGH   25.355  DrugY
1   47   M     LOW        HIGH   13.093  drugC
2   47   M     LOW        HIGH   10.114  drugC
3   28   F  NORMAL        HIGH    7.798  drugX
4   61   F     LOW        HIGH   18.043  DrugY


In [28]:


# Drop any rows with missing values
data.dropna(inplace=True)

# Convert categorical variables to numerical values
label_encoder = LabelEncoder()
data['Sex'] = label_encoder.fit_transform(data['Sex'])
data['BP'] = label_encoder.fit_transform(data['BP'])
data['Cholesterol'] = label_encoder.fit_transform(data['Cholesterol'])
data['Drug'] = label_encoder.fit_transform(data['Drug'])

# Split the data into input features (X) and target variable (y)
X = data.drop('Drug', axis=1)
y = data['Drug']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [32]:
# Initialize the ANN model
model = Sequential()

# Add input layer and first hidden layer
model.add(Dense(units=64, activation='relu', input_dim=X.shape[1]))

# Add additional hidden layers
model.add(Dense(units=64, activation='relu'))
model.add(Dense(units=64, activation='relu'))

# Add output layer
model.add(Dense(units=len(label_encoder.classes_), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Print the model summary
model.summary()


Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 64)                384       
                                                                 
 dense_13 (Dense)            (None, 64)                4160      
                                                                 
 dense_14 (Dense)            (None, 64)                4160      
                                                                 
 dense_15 (Dense)            (None, 5)                 325       
                                                                 
Total params: 9,029
Trainable params: 9,029
Non-trainable params: 0
_________________________________________________________________


In [35]:
# Convert categorical variables to numerical values with handling unseen labels
label_encoder = LabelEncoder()
label_encoder.fit(data['Drug'])  # Fit the encoder on the entire dataset

# Generate random data for testing
random_data = pd.DataFrame({
    'Age': [35],
    'Sex': [0],  # 0: Female, 1: Male
    'BP': [2],  # 0: Low, 1: Normal, 2: High
    'Cholesterol': [1],  # 0: Normal, 1: High
    'Na_to_K': [15]
})

# Convert categorical variables to numerical values with handling unseen labels
random_data['Sex'] = label_encoder.transform(random_data['Sex'])
random_data['BP'] = label_encoder.transform(random_data['BP'])
random_data['Cholesterol'] = label_encoder.transform(random_data['Cholesterol'])

# Normalize the input features
normalized_random_data = (random_data - X.mean()) / X.std()

# Make predictions
predictions = model.predict(normalized_random_data)
predicted_class = label_encoder.inverse_transform(np.argmax(predictions, axis=1))

# Print the predicted class
print("Predicted Drug Category:", predicted_class[0])


Predicted Drug Category: 4
