In [8]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder

# Load the dataset
file_path = "pill_identification_data.csv"
df = pd.read_csv(file_path)




In [None]:
#No train-test split since that gave tooo low of accuracy
#trains model on 100% of data

#drop missing values
df.dropna(inplace=True)

#encode categorical vars
label_encoders = {}
for column in ["shape", "color", "imprint", "name"]:  # exclude 'size' since it's numerical
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])  #convert text to num
    label_encoders[column] = le  # store encoders for decoding predictions



In [None]:
#defines features (X) and target (y)
X = df[["shape", "color", "size", "imprint"]]
y = df["name"]



In [None]:
#train Random Forest model on 100% of the data
rf_model = RandomForestClassifier(
    n_estimators=200,   
    max_depth=20,       
    min_samples_split=5,  
    class_weight="balanced",  
    random_state=42
)
rf_model.fit(X, y)  



In [None]:
# pill prediction Function
def predict_pill(shape, color, size, imprint):
    shape_encoded = label_encoders["shape"].transform([shape])[0]
    color_encoded = label_encoders["color"].transform([color])[0]
    imprint_encoded = label_encoders["imprint"].transform([imprint])[0]
    
    #convert input to df w col names
    input_data = pd.DataFrame([[shape_encoded, color_encoded, size, imprint_encoded]], columns=X.columns)
    
    prediction = rf_model.predict(input_data)
    predicted_pill = label_encoders["name"].inverse_transform(prediction)
    return predicted_pill[0]



In [None]:
#example
example_prediction = predict_pill("ROUND", "YELLOW",1, "AN;573")
print(f"Predicted Pill: {example_prediction}")

Predicted Pill: Isosorbide Dinitrate 2.5 MG Sublingual Tablet


In [None]:
# RESULT:

# Predicted Pill: Isosorbide Dinitrate 2.5 MG Sublingual Tablet       //WRONG: correct name: Bethanechol Chloride 25 MG Oral Tablet

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

#make predictions on the full dataset
y_pred = rf_model.predict(X)

#calculate accuracy
accuracy = accuracy_score(y, y_pred)
print("Accuracy Score: %f" % accuracy)

#calculate precision 
precision = precision_score(y, y_pred, average="weighted", zero_division=1)
print("Precision Score: %f" % precision)

#calc recall
recall = recall_score(y, y_pred, average="weighted", zero_division=1)
print("Recall Score: %f" % recall)

#calc f1 score
f1 = f1_score(y, y_pred, average="weighted", zero_division=1)
print("F1 Score: %f" % f1)


Accuracy Score: 0.613404
Precision Score: 0.830960
Recall Score: 0.613404
F1 Score: 0.580116


In [None]:
'''
RESULTS:

Accuracy Score: 0.613404
Precision Score: 0.830960
Recall Score: 0.613404
F1 Score: 0.580116

'''