<a href="https://colab.research.google.com/github/Sudhir22/Plane-Accident-Severity/blob/master/Model_XGBoost.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
!pip install -q xlrd
!git clone https://github.com/Sudhir22/Plane-Accident-Severity.git

In [0]:
import pandas as pd
import xgboost as xbg
from sklearn.model_selection import train_test_split
import numpy as np
from sklearn.metrics import precision_score, recall_score, accuracy_score,f1_score
from sklearn.model_selection import KFold


##**Loading and preprocessing data**

In [0]:
train_data=pd.read_csv('Plane-Accident-Severity/train.csv')

In [0]:
train_data.groupby(['Severity']).count()

In [0]:
train_data['Severity']=pd.Categorical(train_data['Severity'])
train_data['Severity_Code']=train_data['Severity'].cat.codes

###**Dividing data for training and testing**

In [0]:
X=train_data.iloc[:,1:11]
X.head()

In [0]:
Y=train_data.iloc[:,12]

###**Training and testing model using cross-validation**

In [0]:
kf=KFold(n_splits=5)

In [0]:
param = {
    'eta': 0.4, 
    'max_depth': 12,  
    'objective': 'multi:softprob',  
    'num_class': 4} 

steps = 1000  # The number of training iterations

In [0]:
for x,y in kf.split(X):
  X_train,X_test = X.iloc[x,:],X.iloc[y,:]
  Y_train,Y_test = Y[x],Y[y]
  D_train = xbg.DMatrix(X_train, label=Y_train)
  D_test = xbg.DMatrix(X_test, label=Y_test)
  model = xbg.train(param, D_train, steps)
  preds = model.predict(D_test)
  best_preds = np.asarray([np.argmax(line) for line in preds])

  print("Precision = {}".format(precision_score(Y_test, best_preds, average='macro')))
  print("Recall = {}".format(recall_score(Y_test, best_preds, average='macro')))
  print("Accuracy = {}".format(accuracy_score(Y_test, best_preds)))
  print("F1-score = {}".format(f1_score(Y_test,best_preds,average='weighted')))
  print("********************************************************************")


##**Using trained model for prediction/inference**

In [0]:
test_data=pd.read_csv("Plane-Accident-Severity/test.csv")

In [0]:
test_X=test_data.iloc[:,0:10]

In [0]:
D_test_test=xbg.DMatrix(test_X)
test_preds = model.predict(D_test_test)
best_test_preds = np.asarray([np.argmax(line) for line in test_preds])

In [0]:
severity_dict={1:'Minor_Damage_And_Injuries',2:'Significant_Damage_And_Fatalities',3:'Significant_Damage_And_Serious_Injuries',0:'Highly_Fatal_And_Damaging'}

In [0]:
severity_list=list()
for x in best_test_preds:
  severity_list.append(severity_dict[x])

In [0]:
test_data['Severity']=pd.Series(severity_list)

In [0]:
test_data[['Accident_ID','Severity']].to_csv("Plane-Accident-Severity/submission4.csv",index=False)