In [44]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense, LSTM
from tensorflow.keras.utils import to_categorical
# Load dataset
df = pd.read_csv('/content/drugs_side_effects_drugs_com.csv')

In [45]:
df.head()

Unnamed: 0,drug_name,medical_condition,side_effects,generic_name,drug_classes,brand_names,activity,rx_otc,pregnancy_category,csa,alcohol,related_drugs,medical_condition_description,rating,no_of_reviews,drug_link,medical_condition_url
0,doxycycline,Acne,"(hives, difficult breathing, swelling in your ...",doxycycline,"Miscellaneous antimalarials, Tetracyclines","Acticlate, Adoxa CK, Adoxa Pak, Adoxa TT, Alod...",87%,Rx,D,N,X,amoxicillin: https://www.drugs.com/amoxicillin...,Acne Other names: Acne Vulgaris; Blackheads; B...,6.8,760.0,https://www.drugs.com/doxycycline.html,https://www.drugs.com/condition/acne.html
1,spironolactone,Acne,hives ; difficulty breathing; swelling of your...,spironolactone,"Aldosterone receptor antagonists, Potassium-sp...","Aldactone, CaroSpir",82%,Rx,C,N,X,amlodipine: https://www.drugs.com/amlodipine.h...,Acne Other names: Acne Vulgaris; Blackheads; B...,7.2,449.0,https://www.drugs.com/spironolactone.html,https://www.drugs.com/condition/acne.html
2,minocycline,Acne,"skin rash, fever, swollen glands, flu-like sym...",minocycline,Tetracyclines,"Dynacin, Minocin, Minolira, Solodyn, Ximino, V...",48%,Rx,D,N,,amoxicillin: https://www.drugs.com/amoxicillin...,Acne Other names: Acne Vulgaris; Blackheads; B...,5.7,482.0,https://www.drugs.com/minocycline.html,https://www.drugs.com/condition/acne.html
3,Accutane,Acne,problems with your vision or hearing; muscle o...,isotretinoin (oral),"Miscellaneous antineoplastics, Miscellaneous u...",,41%,Rx,X,N,X,doxycycline: https://www.drugs.com/doxycycline...,Acne Other names: Acne Vulgaris; Blackheads; B...,7.9,623.0,https://www.drugs.com/accutane.html,https://www.drugs.com/condition/acne.html
4,clindamycin,Acne,hives ; difficult breathing; swelling of your ...,clindamycin topical,"Topical acne agents, Vaginal anti-infectives","Cleocin T, Clindacin ETZ, Clindacin P, Clindag...",39%,Rx,B,N,,doxycycline: https://www.drugs.com/doxycycline...,Acne Other names: Acne Vulgaris; Blackheads; B...,7.4,146.0,https://www.drugs.com/mtm/clindamycin-topical....,https://www.drugs.com/condition/acne.html


In [46]:
categorical_features = ['drug_classes', 'rx_otc', 'csa', 'alcohol', 'brand_names', 'side_effects']
target_column = 'medical_condition'

In [47]:
df[categorical_features] = df[categorical_features].fillna('Unknown')  # Fill missing categorical values
df[target_column] = df[target_column].fillna(df[target_column].mode()[0])  # Fill target with most frequent value


In [48]:
# Encode categorical features using one-hot encoding
df = pd.get_dummies(df, columns=categorical_features, drop_first=True)

In [49]:
print(df.dtypes[df.dtypes == 'object'])

drug_name                        object
medical_condition                object
generic_name                     object
activity                         object
pregnancy_category               object
related_drugs                    object
medical_condition_description    object
drug_link                        object
medical_condition_url            object
dtype: object


In [50]:
# Extract features and target variable
X = df.drop(target_column, axis=1).values  # Convert to numpy array
y = df[target_column].values  # Target column

In [51]:
# Extract features and target variable
X = df.drop(target_column, axis=1)  # No .values yet to keep it as a DataFrame

# Convert object columns to numerical using Label Encoding
for col in X.select_dtypes(include=['object']).columns:
    X[col] = X[col].astype('category').cat.codes

# Now convert to NumPy array
X = X.values.astype(np.float32)

y = df[target_column].values  # Target column

In [52]:
# Encode target labels
label_encoder = LabelEncoder()
y = label_encoder.fit_transform(y)
num_classes = len(label_encoder.classes_)
y = to_categorical(y, num_classes=num_classes)

In [53]:
# Before splitting into train and test sets
for col in df.columns:
    if df[col].dtype == 'object':
        print(f"Column '{col}' has object dtype. Consider converting it.")
        # Try converting to numeric if possible:
        # df[col] = pd.to_numeric(df[col], errors='coerce')  # 'coerce' will replace non-numeric with NaN

# After converting, fill any NaNs:
df = df.fillna(0)  # Or use another appropriate strategy

# Then proceed with splitting, reshaping, and model training.

Column 'drug_name' has object dtype. Consider converting it.
Column 'medical_condition' has object dtype. Consider converting it.
Column 'generic_name' has object dtype. Consider converting it.
Column 'activity' has object dtype. Consider converting it.
Column 'pregnancy_category' has object dtype. Consider converting it.
Column 'related_drugs' has object dtype. Consider converting it.
Column 'medical_condition_description' has object dtype. Consider converting it.
Column 'drug_link' has object dtype. Consider converting it.
Column 'medical_condition_url' has object dtype. Consider converting it.


In [54]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [55]:
# Reshape for LSTM input
X_train = np.expand_dims(X_train, axis=1)  # Adding a timestep dimension
X_test = np.expand_dims(X_test, axis=1)

In [56]:
# Define LSTM Model
model = Sequential([
    LSTM(128, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False),
    Dense(64, activation='relu'),
    Dense(num_classes, activation='softmax')
])

  super().__init__(**kwargs)


In [57]:
# Compile Model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [58]:
# Train Model
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32)

Epoch 1/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 12ms/step - accuracy: 0.0163 - loss: nan - val_accuracy: 0.0221 - val_loss: nan
Epoch 2/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0194 - loss: nan - val_accuracy: 0.0221 - val_loss: nan
Epoch 3/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0205 - loss: nan - val_accuracy: 0.0221 - val_loss: nan
Epoch 4/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.0165 - loss: nan - val_accuracy: 0.0221 - val_loss: nan
Epoch 5/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - accuracy: 0.0160 - loss: nan - val_accuracy: 0.0221 - val_loss: nan
Epoch 6/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.0205 - loss: nan - val_accuracy: 0.0221 - val_loss: nan
Epoch 7/10
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0

<keras.src.callbacks.history.History at 0x7f77f0af8a90>

In [59]:
# Evaluate Model
loss, accuracy = model.evaluate(X_test, y_test)
print(f"Test Accuracy: {accuracy:.4f}")

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.0175 - loss: nan
Test Accuracy: 0.0221


In [60]:
# Save Model
model.save("rnn_model.h5")



In [61]:
# Load Model
model = load_model("rnn_model.h5")



In [62]:
y_pred = model.predict(X_test)
predicted_classes = label_encoder.inverse_transform(y_pred.argmax(axis=1))

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 15ms/step


In [63]:
# Display Predictions
print("Predicted Classes:", predicted_classes)

Predicted Classes: ['ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD'
 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' 'ADHD' '