Transfer 

In [22]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [23]:
df = pd.read_csv('RTA.csv')

In [24]:
# Mapping dictionary: all alternate names → standardized name
replace_map = {
    "DGH, Mannar": "DGH – Mannar",
    "DGH,Kilinochchi": "DGH – Kilinochchi",

    "BH,Mankulam(TypeA)": "Base Hospital (A) - Mankulam",
    "BH, Tellipalai(Type A)": "Base Hospital (A) - Tellipalai",
    "BH,Chavakachcheri(TypeB)": "Base Hospital (B) - Chavakachcheri",
    "BH,Puthukudijiruppu(TypeB)": "Base Hospital (B) - Puthukudiyiruppu",
    "BH,Mallavi(TypeB)": "Base Hospital (B) - Mallavi",
    "BH,Murungan (TypeB)": "Base Hospital (B) - Murunkan",
}

# Apply replacement

df['Current Hospital Name'] = df['Current Hospital Name'].replace(replace_map)
df['Transfer To Next Hospital'] = df['Transfer To Next Hospital'].replace(replace_map)

]

In [25]:
new_df = df[["Transfer To Next Hospital", "Bystander Expenditure per day","Traveling Expenditure per day","Family Current Status","Hospital Distance From Home","Ethnicity","Date Of Birth","Gender","Alcohol Consumption","Illicit Drugs","First Hospital Name","Site of Injury No1","Type of injury No 1","Site of injury No 2","Type of Injury No 2","Other Injury","Any Other Hospital Admission Expenditure","Life Style"]]

#### Bystander Expenditure per day

In [26]:
expenditure_map = {
    'Not Necessary': 0,
    '0-500': 1,
    '500-1000': 2,
    'More than 1000': 4
}

new_df['Bystander Expenditure per day'] = new_df['Bystander Expenditure per day'].map(expenditure_map)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Bystander Expenditure per day'] = new_df['Bystander Expenditure per day'].map(expenditure_map)


#### Traveling Expenditure per day

In [27]:
travel_exp_map = {
    'Victim not willing to share/ Unable to respond/  Early Discharge': -1,
    '0-100': 1,
    '100-200': 2,
    '200-300': 3,
    '300-400': 4,
    '400-500': 5,
    'More than 500': 7
}

new_df['Traveling Expenditure per day'] = new_df['Traveling Expenditure per day'].map(travel_exp_map)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Traveling Expenditure per day'] = new_df['Traveling Expenditure per day'].map(travel_exp_map)


#### Family Current Status

In [28]:
# Define mapping for ordinal encoding
ordinal_map = {
    "Victim not willing to share/ Unable to respond/  Early Discharge": -1,  # treat as missing or unknown
    "Not Affected": 0,
    "Mildly Affected": 1,
    "Moderately Affected": 2,
    "Severely Affected": 3
}

new_df['Family Current Status'] = new_df['Family Current Status'].map(ordinal_map)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Family Current Status'] = new_df['Family Current Status'].map(ordinal_map)


#### Hospital Distance From Home

In [29]:
# Mapping for distance ranges to midpoints
distance_map = {
    "Victim doesn't have knowledge on distance/ Not willing to share/ Unable to respond/  Early Discharge": np.nan,
    "Less than 5 Km": 2.5,
    "5-10 Km": 7.5,
    "10-15 Km": 12.5,
    "15-20 Km": 17.5,
    "20-25 Km": 22.5,
    "25-30 Km": 27.5,
    "25-30 km": 27.5,  # same as above, fix case difference
    "30-50 Km": 40,
    "50-100 Km": 75,
    "100-150 Km": 125,
    "150-200 Km": 175
}

new_df['Hospital Distance From Home'] = new_df['Hospital Distance From Home'].map(distance_map)
new_df['Hospital Distance From Home'].fillna(-1, inplace=True)  # Fill

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Hospital Distance From Home'] = new_df['Hospital Distance From Home'].map(distance_map)
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df['Hospital Distance From Home'].fillna(-1, inplace=True)  # Fill


#### Any Other Hospital Admission Expenditure

In [30]:
# Encode: 0 if "No Other Expenses", else 1
new_df["Any Other Hospital Admission Expenditure"] = new_df["Any Other Hospital Admission Expenditure"].apply(
    lambda x: 0 if x == "No Other Expenses" else 1
)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df["Any Other Hospital Admission Expenditure"] = new_df["Any Other Hospital Admission Expenditure"].apply(


#### Ethnicity

In [31]:
# Create one-hot encoded DataFrame
ethnicity_dummies = pd.get_dummies(new_df['Ethnicity'])

# Rename columns meaningfully and convert to 0/1 integers
ethnicity_dummies = ethnicity_dummies.rename(columns=lambda x: f"Ethnicity_{x}").astype(int)

# Join back to original DataFrame
new_df = pd.concat([new_df, ethnicity_dummies], axis=1)

# Drop original column
new_df.drop(columns=['Ethnicity'], inplace=True)


#### Date Of Birth

In [32]:
# Ensure the DOB column is in datetime format
new_df['Date Of Birth'] = pd.to_datetime(new_df['Date Of Birth'], errors='coerce')

# Reference date
reference_date = pd.to_datetime('2023-01-01')

# Calculate age in years (rounded down)
new_df['Person Age (as of 2023-01-01)'] = (reference_date - new_df['Date Of Birth']).dt.days // 365

new_df['Person Age (as of 2023-01-01)'] = new_df['Person Age (as of 2023-01-01)'].fillna(-1).astype(int)
new_df.drop('Date Of Birth', axis=1, inplace=True)

#### Gender

In [33]:
gender_map = {
    "Male": 1,
    "Female": 0
}

new_df['Gender'] = new_df['Gender'].map(gender_map)


#### Life Style

In [34]:
# Create one-hot encoded DataFrame
lifestyle_dummies = pd.get_dummies(new_df['Life Style'])

# Remove ambiguous/missing response if present
lifestyle_dummies = lifestyle_dummies.drop(
    columns=['Victim not willing to share/ Unable to respond/  Early Discharge'],
    errors='ignore'
)

# Rename columns meaningfully and convert to 0/1 integers
lifestyle_dummies = lifestyle_dummies.rename(columns=lambda x: f"LifeStyle_{x}").astype(int)

# Join back to original DataFrame
new_df = pd.concat([new_df, lifestyle_dummies], axis=1)

# Drop the original column
new_df.drop(columns=['Life Style'], inplace=True)

#### Alcohol Consumption

In [35]:
alcohol_map = {
    "Yes": 1,
    "No": 0,
    "Victim not willing to share/ Unable to respond/  Early Discharge": -1
}

new_df['Alcohol_Consumption_Encoded'] = new_df['Alcohol Consumption'].map(alcohol_map)

# Optionally drop original column
new_df.drop(columns=['Alcohol Consumption'], inplace=True)

print(new_df[['Alcohol_Consumption_Encoded']].head())


   Alcohol_Consumption_Encoded
0                            0
1                            0
2                            0
3                            1
4                            0


#### Illicit Drugs

In [36]:
# Ordinal encoding of 'Illicit Drugs' column
new_df['Illicit_Drugs_Encoded'] = new_df['Illicit Drugs'].map({
    'Yes': 1,
    'No': 0,
    'Victim not willing to share/ Unable to respond/  Early Discharge': -1
})
new_df.drop(columns=['Illicit Drugs'], inplace=True)

print(new_df[['Illicit_Drugs_Encoded']].head())

   Illicit_Drugs_Encoded
0                      0
1                      0
2                      0
3                      0
4                      0


#### Transfer To Next Hospital

In [37]:
# Define mapping
transfer_mapping = {
    'Yes': 1,
    'No': 0
}

# Apply mapping to a new column
new_df['Transfer To Next Hospital'] = new_df['Transfer To Next Hospital'].map(transfer_mapping).fillna(-1).astype(int)

#### First Hospital Name

In [38]:
# One-hot encode the 'Current Hospital Name' column
hospital_dummies = pd.get_dummies(df['First Hospital Name'])

# Drop 'Incomplete Information' column if present
hospital_dummies = hospital_dummies.drop(columns=['Incomplete Information'], errors='ignore')

# Rename columns to match the schema: Hospital_<name>
hospital_dummies = hospital_dummies.rename(columns=lambda x: f"Hospital_{x}").astype(int)

# Join the encoded columns back to the original DataFrame
df = pd.concat([df, hospital_dummies], axis=1)


df.drop(columns=['First Hospital Name'], inplace=True)

',Transfer To Next Hospital,Bystander Expenditure per day,Traveling Expenditure per day,Family Current Status,Hospital Distance From Home,Gender,First Hospital Name,Site of Injury No1,Type of injury No 1,Site of injury No 2,Type of Injury No 2,Other Injury,Any Other Hospital Admission Expenditure,Ethnicity_Moor,Ethnicity_Sinhalese,Ethnicity_Tamil,Person Age (as of 2023-01-01),LifeStyle_Living alone,LifeStyle_Living with care givers,LifeStyle_Living with children,Alcohol_Consumption_Encoded,Illicit_Drugs_Encoded\r\n0,0,0,3,1,2.5,1,DGH – Vavuniya,Knee,Abration,No Secondary Injury Found,,Not Necessary,0,0,0,1,68,0,0,1,0,0\r\n1,1,4,3,1,125.0,0,DGH – Mullaithivu,Shoulder Clavicle,Fracture,Leg,Abration,No Other Injury,0,0,0,1,21,0,0,1,0,0\r\n2,0,0,5,2,27.5,1,DGH – Vavuniya,Thigh Femur,Fracture,No Secondary Injury Found,,Not Necessary,0,0,1,0,29,0,1,0,0,0\r\n3,0,0,2,2,12.5,1,Teaching hospital - Jaffna (THJ),Elbow Region,Laceration,Foot,Abration,No Other Injury,1,0,0,1,26,0,0,1,1,0\r\n4,0,0,1,

In [40]:
new_df.to_csv('output_Transfer.csv', index=False)