In [1]:
import pandas as pd
import numpy as np
import random

In [2]:
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder,StandardScaler,LabelEncoder
from sklearn.metrics import accuracy_score,precision_score,f1_score,recall_score

In [3]:
df=pd.read_csv('/content/sample_data/drug adverse reaction.csv')

In [4]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7386 entries, 0 to 7385
Data columns (total 11 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   age                   7386 non-null   int64  
 1   sex                   7386 non-null   object 
 2   height_cm             7386 non-null   float64
 3   weight_kg             7386 non-null   float64
 4   BMI                   7386 non-null   float64
 5   medical_history       5927 non-null   object 
 6   drug_name             7386 non-null   object 
 7   condition             7386 non-null   object 
 8   dosage_mg             7386 non-null   int64  
 9   side_effect           6332 non-null   object 
 10  side_effect_severity  6332 non-null   object 
dtypes: float64(3), int64(2), object(6)
memory usage: 634.9+ KB


In [5]:
df['side_effect']=df['side_effect'].fillna('No Side Efffect')

In [6]:
features=['age','sex','height_cm','weight_kg','BMI','medical_history','drug_name','dosage_mg']
x=df[features]
y=df['side_effect']

In [7]:
le=LabelEncoder()
y_encoded=le.fit_transform(y)

In [8]:
num_features=['age','height_cm','weight_kg','BMI','dosage_mg']
cat_features=['sex','medical_history','drug_name']

In [9]:
preprocessor=ColumnTransformer(
    transformers=[
        ('num',StandardScaler(),num_features),
        ('cat',OneHotEncoder(handle_unknown='ignore'),cat_features)
    ]
)

In [10]:
X_transformed=preprocessor.fit_transform(x)


In [11]:
x_train,x_test,y_train,y_test=train_test_split(X_transformed,y_encoded,test_size=0.2,random_state=42)

In [12]:
from sklearn.ensemble import RandomForestClassifier

In [13]:
models={
    'Random Forest':RandomForestClassifier(n_estimators=100,random_state=42),
}

In [14]:
print("=== scikit-learn Model Results (Predicting Side Effect) ===")
for name, model in models.items():
  model.fit(x_train,y_train)
  y_pred=model.predict(x_test)
  print(f"{name} Metrics:")
  print(f"Accuracy: {accuracy_score(y_test,y_pred)}")
  print(f"Precision: {precision_score(y_test,y_pred,average='weighted')}")
  print(f"Recall: {recall_score(y_test,y_pred,average='weighted')}")
  print(f"F1 Score: {f1_score(y_test,y_pred,average='weighted')}")
  print( )

=== scikit-learn Model Results (Predicting Side Effect) ===
Random Forest Metrics:
Accuracy: 0.2604871447902571
Precision: 0.26189835017795926
Recall: 0.2604871447902571
F1 Score: 0.2513495330641678



In [15]:
#making prediction
new_data = {
    'age': [30],
    'sex': ['Female'],
    'height_cm': [160],
    'weight_kg': [55],
    'BMI': [21.5],
    'medical_history': ['Diabetes'],
    'drug_name': ['Zolpidem'],
    'condition': ['Insomnia'],
    'dosage_mg': [100],
}

In [16]:
new_df = pd.DataFrame(new_data)
new_data_transformed = preprocessor.transform(new_df)

In [17]:
print("=== scikit-learn Models Predictions ===")
for name, model in models.items():
    pred = model.predict(new_data_transformed)

    pred_label = le.inverse_transform(pred)
    print(f"{name} Prediction: {pred_label[0]}")

=== scikit-learn Models Predictions ===
Random Forest Prediction: headache
