# Binary classification (Trauma detection)

## Imports

In [None]:
%load_ext autoreload
%autoreload 2

#########################################################################################
#########################################################################################
####### BEGIN OPTIONAL SECTION
####### Comment/Uncomment the following section whether you are running it from 
####### a local setup and not from Google Drive
#########################################################################################
#########################################################################################
"""
GLB_INSTALL_DEPENDENCIES = True
GLB_USE_DRIVE_ACCOUNT = True

# Here to define dependencies
if GLB_INSTALL_DEPENDENCIES:
    !pip install transformers
    !pip install torch
    !pip install openpyxl

if GLB_USE_DRIVE_ACCOUNT:
  from google.colab import drive
  drive.mount('/content/drive')
  %cd /content/drive/MyDrive/"Colab Notebooks"
  username = "IsaacOlguin"
  repository =  "AutomatedTraumaDetectionInGCT"
  %cd {repository}
  %pwd
"""
#########################################################################################
#########################################################################################
####### END OPTIONAL SECTION
#########################################################################################
#########################################################################################

In [None]:
import pandas as pd
import torch
import numpy as np
import matplotlib.pyplot as plt
import json

import src.classification_model_utilities as mlclassif_utilities
import src.general_utilities as gral_utilities

## Globals

In [None]:
global_config_file = gral_utilities.read_config_file("config.yml")

PATH_DATASET = global_config_file["general_set_up"]["dataset_filename"] #"input/dataset/20230112_Dataset_W_Batch1.xlsx" #"input/dataset/Dataset.xlsx"
PATH_DIR_LOGS = global_config_file["general_set_up"]["logs_dir_name"]
INDEX_COLUMNS_DATASET = global_config_file["dataset"]["index_columns_dataset"] # 1
LIST_NAME_COLUMNS_DATASET = global_config_file["dataset"]["list_columns_names"]

## Dataset

In [None]:
df_dataset = mlclassif_utilities.import_dataset_from_excel(PATH_DATASET, INDEX_COLUMNS_DATASET, LIST_NAME_COLUMNS_DATASET)
df_dataset.head()

In [None]:
classes_dataset = mlclassif_utilities.get_unique_values_from_dataset(df_dataset, "trauma")
print(f"Num of different traumas in the dataset is {len(classes_dataset)} which are:")
for index, elem in enumerate(classes_dataset):
    print("\t", index+1, "-", elem)

### Distribution of all classes within the dataset

In [None]:
distribution_classes = mlclassif_utilities.get_distribution_classes_from_dataset(df_dataset, ["trauma"], "span")

labels = list(distribution_classes["trauma"])
sizes = list(distribution_classes["span"])

fig1, ax1 = plt.subplots()
ax1.set_title(f'Distribution of classes in the dataset with a total number of {distribution_classes["span"].sum()} spans')
ax1.pie(sizes, labels=labels, autopct='%2.1f%%', shadow=True, startangle=0) #'%1.1f%%'
ax1.axis('equal')  # Equal aspect ratio ensures that pie is drawn as a circle.

plt.show()

distribution_classes

# Train model

In [None]:
!python /content/drive/MyDrive/"Colab Notebooks"/AutomatedTraumaDetectionInGCT/src/binary_classification.py