In [479]:
import matplotlib.pyplot as plt
import matplotlib.gridspec as gs
import numpy as np
import pandas as pd
import seaborn as sns
import pickle
import joblib
from sklearn.svm import SVC


In [480]:
df = pd.read_csv("CleanedColumn.csv", delimiter=",")

In [481]:
df.columns

Index(['idart', 'idrt', 'weight_final', 'PSU', 'STRATA', 'province', 'gender',
       'age', 'education', 'work_status', 'injured_past_year', 'head_injury',
       'chest_injury', 'back_injury', 'stomach_injury', 'upper_body_injury',
       'lower_body_injury', 'bruises', 'cuts', 'sprains', 'broken_bones',
       'severed_limbs', 'eye_injury', 'brain_damage', 'internal_damage',
       'burns', 'other', 'impaired', 'missing_body_parts', 'permanent_injury',
       'place_of_injury', 'emotional_mental_health_disorder', 'weight_normal',
       'filter_$', 'time', 'has_injury'],
      dtype='object')

In [482]:
df.drop(columns=['idart','idrt','weight_final','weight_normal','PSU','STRATA','province','time','filter_$'], inplace=True)

In [483]:
df.columns

Index(['gender', 'age', 'education', 'work_status', 'injured_past_year',
       'head_injury', 'chest_injury', 'back_injury', 'stomach_injury',
       'upper_body_injury', 'lower_body_injury', 'bruises', 'cuts', 'sprains',
       'broken_bones', 'severed_limbs', 'eye_injury', 'brain_damage',
       'internal_damage', 'burns', 'other', 'impaired', 'missing_body_parts',
       'permanent_injury', 'place_of_injury',
       'emotional_mental_health_disorder', 'has_injury'],
      dtype='object')

In [484]:
injury_cols = [
    "head_injury", 
    "chest_injury", 
    "back_injury", 
    "stomach_injury", 
    "upper_body_injury", 
    "lower_body_injury", 
    "bruises", 
    "cuts", 
    "sprains", 
    "broken_bones", 
    "severed_limbs", 
    "eye_injury", 
    "brain_damage", 
    "internal_damage", 
    "burns", 
    "other"
]

df['total_injuries'] = df[injury_cols].gt(0).sum(axis=1)

In [485]:
severity_weights = {
    'head_injury': 5,          # Very severe
    'chest_injury': 4,         # Severe
    'back_injury': 3,          # Moderate
    'stomach_injury': 3,       # Moderate
    'upper_body_injury': 2,    # Mild
    'lower_body_injury': 2,    # Mild
    'bruises': 1,              # Minor
    'cuts': 1,                 # Minor
    'sprains': 2,              # Mild
    'broken_bones': 4,         # Severe
    'severed_limbs': 5,        # Very severe
    'eye_injury': 4,           # Severe
    'brain_damage': 5,         # Very severe
    'internal_damage': 5,      # Very severe
    'burns': 4,                # Severe
    'other': 1                 # Minor
}
df['weighted_injury_severity'] = df[severity_weights.keys()].mul(severity_weights.values()).sum(axis=1)

In [486]:
df['total_injury_severity'] = df[['head_injury', 'chest_injury', 'back_injury', 'stomach_injury',
                                  'upper_body_injury', 'lower_body_injury', 'bruises', 'cuts', 
                                  'sprains', 'broken_bones', 'severed_limbs', 'eye_injury', 
                                  'brain_damage', 'internal_damage', 'burns', 'other']].sum(axis=1)

In [487]:
df['injured_body_parts_count'] = df[['head_injury', 'chest_injury', 'back_injury', 'stomach_injury',
                                     'upper_body_injury', 'lower_body_injury', 'bruises', 'cuts', 
                                     'sprains', 'broken_bones', 'severed_limbs', 'eye_injury', 
                                     'brain_damage', 'internal_damage', 'burns', 'other']].gt(0).sum(axis=1)

In [488]:
df['severe_injuries_count'] = df[['head_injury', 'chest_injury', 'back_injury', 'stomach_injury',
                                  'upper_body_injury', 'lower_body_injury', 'bruises', 'cuts', 
                                  'sprains', 'broken_bones', 'severed_limbs', 'eye_injury', 
                                  'brain_damage', 'internal_damage', 'burns', 'other']].eq(2).sum(axis=1)

In [489]:
injury_columns = [
    'head_injury', 
    'chest_injury', 
    'back_injury', 
    'stomach_injury', 
    'upper_body_injury', 
    'lower_body_injury', 
    'bruises', 
    'cuts', 
    'sprains', 
    'broken_bones', 
    'severed_limbs', 
    'eye_injury', 
    'brain_damage', 
    'internal_damage', 
    'burns', 
    'other'
]



In [490]:
df['injured_past_year'].dtypes

dtype('int64')

In [491]:
# df = df.drop(df[df['injured_past_year'] == 2].index)
# # df = df.drop(df[df['age'] > 75].index)
# df = df.drop(df[df['total_injury_severity'] < 26].index)
# # df = df.drop(df[df['severe_injuries_count'] < 10].index)
# # df = df.drop(df[~df['education'].between(2, 5)].index)


In [492]:
corr_matrix = df.corr()
low_corr_features = corr_matrix['emotional_mental_health_disorder'].abs().sort_values(ascending=False)
print(low_corr_features.head(40))
weak_features = low_corr_features[low_corr_features < 0.01].index
df.drop(columns=weak_features, inplace=True)

emotional_mental_health_disorder    1.000000
severed_limbs                       0.106764
has_injury                          0.106635
total_injuries                      0.106635
injured_body_parts_count            0.106635
injured_past_year                   0.106635
missing_body_parts                  0.106557
brain_damage                        0.106553
eye_injury                          0.106546
burns                               0.106473
impaired                            0.106383
chest_injury                        0.106035
weighted_injury_severity            0.105892
broken_bones                        0.105699
total_injury_severity               0.105596
internal_damage                     0.105519
other                               0.105399
stomach_injury                      0.104996
cuts                                0.104854
head_injury                         0.104584
severe_injuries_count               0.104227
back_injury                         0.103732
permanent_

In [493]:
df.columns

Index(['gender', 'age', 'education', 'work_status', 'injured_past_year',
       'head_injury', 'chest_injury', 'back_injury', 'stomach_injury',
       'upper_body_injury', 'lower_body_injury', 'bruises', 'cuts', 'sprains',
       'broken_bones', 'severed_limbs', 'eye_injury', 'brain_damage',
       'internal_damage', 'burns', 'other', 'impaired', 'missing_body_parts',
       'permanent_injury', 'place_of_injury',
       'emotional_mental_health_disorder', 'has_injury', 'total_injuries',
       'weighted_injury_severity', 'total_injury_severity',
       'injured_body_parts_count', 'severe_injuries_count'],
      dtype='object')

In [494]:
df.to_csv("dataFinal.csv", index=False)