In [26]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [27]:
df = pd.read_csv('output_Transfer_with_Severity.csv')

In [28]:
df.columns

Index(['Transfer To Next Hospital', 'Bystander Expenditure per day',
       'Traveling Expenditure per day', 'Family Current Status',
       'Hospital Distance From Home', 'Gender', 'First Hospital Name',
       'Site of Injury No1', 'Type of injury No 1', 'Site of injury No 2',
       'Type of Injury No 2', 'Other Injury',
       'Any Other Hospital Admission Expenditure', 'Ethnicity_Moor',
       'Ethnicity_Sinhalese', 'Ethnicity_Tamil',
       'Person Age (as of 2023-01-01)', 'LifeStyle_Living alone',
       'LifeStyle_Living with care givers', 'LifeStyle_Living with children',
       'Alcohol_Consumption_Encoded', 'Illicit_Drugs_Encoded', 'Severity'],
      dtype='object')

In [29]:
df.drop(columns=['Other Injury','Site of Injury No1','Type of injury No 1','Site of injury No 2','Type of Injury No 2'], inplace=True)

In [30]:
df.dtypes

Transfer To Next Hospital                     int64
Bystander Expenditure per day                 int64
Traveling Expenditure per day                 int64
Family Current Status                         int64
Hospital Distance From Home                 float64
Gender                                        int64
First Hospital Name                          object
Any Other Hospital Admission Expenditure      int64
Ethnicity_Moor                                int64
Ethnicity_Sinhalese                           int64
Ethnicity_Tamil                               int64
Person Age (as of 2023-01-01)                 int64
LifeStyle_Living alone                        int64
LifeStyle_Living with care givers             int64
LifeStyle_Living with children                int64
Alcohol_Consumption_Encoded                   int64
Illicit_Drugs_Encoded                         int64
Severity                                     object
dtype: object

In [31]:
df = pd.get_dummies(df, columns=["First Hospital Name"])
df = df.astype({col: 'int8' for col in df.columns if col.startswith("First Hospital Name_")})


In [32]:
# Define mapping
mapping = {"U": -1, "M": 1, "S": 2}

# Apply mapping
df["Severity"] = df["Severity"].map(mapping)

#### Training the model

In [33]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Suppose df is your DataFrame
# Make sure target is binary (0/1)
df["Transfer To Next Hospital"] = df["Transfer To Next Hospital"].astype(int)

X = df.drop("Transfer To Next Hospital", axis=1)  # features
y = df["Transfer To Next Hospital"]               # target

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)


In [34]:
from sklearn.linear_model import LogisticRegression


model = LogisticRegression(max_iter=500)
model.fit(X_train, y_train)

STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [35]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report

# Predictions
y_pred = model.predict(X_test)

# Evaluation metrics
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)

print("Model Evaluation:")
print(f"Accuracy  : {accuracy:.4f}")
print(f"Precision : {precision:.4f}")
print(f"Recall    : {recall:.4f}")
print(f"F1 Score  : {f1:.4f}\n")

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)

# Detailed classification report
print("\nClassification Report:\n", classification_report(y_test, y_pred))


Model Evaluation:
Accuracy  : 0.9498
Precision : 0.8785
Recall    : 0.6573
F1 Score  : 0.7520

Confusion Matrix:
 [[1080   13]
 [  49   94]]

Classification Report:
               precision    recall  f1-score   support

           0       0.96      0.99      0.97      1093
           1       0.88      0.66      0.75       143

    accuracy                           0.95      1236
   macro avg       0.92      0.82      0.86      1236
weighted avg       0.95      0.95      0.95      1236



In [42]:
# Get probabilities for each class (0 and 1)
y_proba = model.predict_proba(X_test)

# Extract probability of class 1 (Transfer = 1)
transfer_proba = y_proba[:, 1]

print("Predicted transfer probabilities:")
print(transfer_proba[:20])  # show first 10 probabilities


Predicted transfer probabilities:
[2.83727791e-04 4.69983345e-02 1.18339175e-02 1.74167073e-02
 1.20108963e-02 1.13986862e-02 1.52653493e-03 1.33391637e-04
 1.17334766e-03 9.88598156e-03 1.02036538e-02 1.90138115e-02
 1.12746315e-03 2.66726893e-01 1.48233362e-03 9.24085075e-03
 6.33601941e-02 4.93127080e-02 7.19273554e-03 1.28690647e-02]


In [40]:
# X_test.columns
y_pred[:20]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [43]:
print("Model Parameters:")
print(model.get_params())


Model Parameters:
{'C': 1.0, 'class_weight': None, 'dual': False, 'fit_intercept': True, 'intercept_scaling': 1, 'l1_ratio': None, 'max_iter': 500, 'multi_class': 'auto', 'n_jobs': None, 'penalty': 'l2', 'random_state': None, 'solver': 'lbfgs', 'tol': 0.0001, 'verbose': 0, 'warm_start': False}


In [47]:
print("Feature Data Types:")
X_train.dtypes


Feature Data Types:


Bystander Expenditure per day                                 int64
Traveling Expenditure per day                                 int64
Family Current Status                                         int64
Hospital Distance From Home                                 float64
Gender                                                        int64
Any Other Hospital Admission Expenditure                      int64
Ethnicity_Moor                                                int64
Ethnicity_Sinhalese                                           int64
Ethnicity_Tamil                                               int64
Person Age (as of 2023-01-01)                                 int64
LifeStyle_Living alone                                        int64
LifeStyle_Living with care givers                             int64
LifeStyle_Living with children                                int64
Alcohol_Consumption_Encoded                                   int64
Illicit_Drugs_Encoded                           

In [46]:
import pandas as pd

pd.set_option("display.max_rows", None)    # show all rows
pd.set_option("display.max_columns", None) # show all columns
pd.set_option("display.width", None)       # don't wrap columns
pd.set_option("display.max_colwidth", None) # show full text in columns


In [49]:
X_train.columns

Index(['Bystander Expenditure per day', 'Traveling Expenditure per day',
       'Family Current Status', 'Hospital Distance From Home', 'Gender',
       'Any Other Hospital Admission Expenditure', 'Ethnicity_Moor',
       'Ethnicity_Sinhalese', 'Ethnicity_Tamil',
       'Person Age (as of 2023-01-01)', 'LifeStyle_Living alone',
       'LifeStyle_Living with care givers', 'LifeStyle_Living with children',
       'Alcohol_Consumption_Encoded', 'Illicit_Drugs_Encoded', 'Severity',
       'First Hospital Name_BH, Tellipalai(Type A)',
       'First Hospital Name_BH,Chavakachcheri(TypeB)',
       'First Hospital Name_BH,Mallavi(TypeB)',
       'First Hospital Name_BH,Mankulam(TypeA)',
       'First Hospital Name_BH,Murungan (TypeB)',
       'First Hospital Name_BH,Puthukudijiruppu(TypeB)',
       'First Hospital Name_Base Hospital (A) - Mankulam',
       'First Hospital Name_Base Hospital (A) - Point Pedro',
       'First Hospital Name_Base Hospital (A) -Tellipalai',
       'First Hospital N

In [50]:
import joblib

# Suppose you already trained a model
model = LogisticRegression()
model.fit(X_train, y_train)

# Save model
joblib.dump(model, "trained_model.pkl")

# Later, load it again
loaded_model = joblib.load("trained_model.pkl")

# Test loading
print(loaded_model.predict(X_test[:5]))

[0 0 0 0 0]


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
