In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
!pip install -r requirements.txt
import os

from src.preprocessing import DataPreprocessor
from src.model import Model
from src.gui import AnalysisGUI

In [None]:
# Load training and testing datasets
dataset = DataPreprocessor(
    filepath="data/Line Listing(dxjZy).xlsx",
    column_names=['Case ID', 'Suspect Product Active Ingredients', 'Serious', 'Sex', 'Patient Age', 'Patient Weight']
)
dataset.explode_column("Suspect Product Active Ingredients", ";")
medications = dataset.data["Suspect Product Active Ingredients"] # SAVED FOR LATER FOR MENUS IN GUI

print("Value Counts:")
print(dataset.get_value_counts("Serious"))

print("\nData Types:")
print(dataset.get_dataframe().dtypes)

dataset.get_dummies(["Suspect Product Active Ingredients"])
dataset.data.groupby("Case ID").max().reset_index()
dataset.drop_columns(["Case ID"])

dataset.ensure_numeric_column("Patient Age", decimal=False)

dataset.convert_nulls("Patient Weight", nulls=["Not Specified"], output="0 KG")
dataset.ensure_numeric_column("Patient Weight")

dataset.encode_column("Sex")
dataset.encode_column("Serious")

dataset.drop_all_nulls()

print("\nNew Value Counts:")
print(dataset.get_value_counts("Serious"))

print("\nDataset Head:")
print(dataset.get_dataframe().head())

x_train, x_test, y_train, y_test = dataset.get_standardised_train_test_split(
    dataset.data.columns, 
    y="Serious", 
    test_size=0.2, 
    random_state=42
)

In [None]:
# Creating model
model_name = "default"

input_dim = len(x_train[0])
analysis_model = Model(input_dim)
if os.path.exists(f"models/{model_name}.pt"):
    analysis_model.load_model(model_name)
else:
    analysis_model.train_model(x_train, y_train)
    analysis_model.save_model(model_name)

analysis_model.evaluate_model(x_test, y_test)

In [None]:
# Show GUI
medications = medications.unique()

allergies = []

gui = AnalysisGUI(model=analysis_model, medications=medications, allergies=allergies)