# Accident classifier 

Summary of the below code:
1) Data cleaning of the two Datasets
2) Data combining and creating a suitable dataset
3) Application of random forest Classifier to classify the two types of accidents
4) Testing the model and calculating accuracy


In [1]:
import numpy as np
import pandas as pd

In [2]:
a_data=pd.read_csv("ACCIDENTAL_DATA.csv")

In [3]:
p_data=pd.read_csv("PHONE_FALL_DATA.csv")

In [4]:
p_data=p_data.dropna()

In [5]:
a_data

Unnamed: 0,MS,X,Y
0,2,-9.8,2.2
1,4,-7.8,0.2
2,6,-7.8,1.8
3,8,-5.8,2.6
4,10,-4.6,1.0
...,...,...,...
171,210,-0.5,-1.5
172,220,0.0,-1.5
173,230,0.0,-1.5
174,240,0.0,-1.5


In [6]:
p_data

Unnamed: 0,ACTUAL TIME (ms),TIME INTERVAL (ms),Accel X,Accel Y
0,0.0,0.0,-3.237152,2.039383
1,8.0,8.0,-3.347595,1.998138
2,12.0,4.0,-3.489105,1.942612
3,15.0,3.0,-3.608948,1.878632
4,23.0,8.0,-3.689438,1.838272
...,...,...,...,...
2494,12559.0,2.0,0.601135,4.464630
2495,12567.0,8.0,0.608826,4.460434
2496,12573.0,6.0,0.615509,4.467102
2497,12575.0,2.0,0.615509,4.462463


In [7]:
a_data['Outcome']=1
a_data

Unnamed: 0,MS,X,Y,Outcome
0,2,-9.8,2.2,1
1,4,-7.8,0.2,1
2,6,-7.8,1.8,1
3,8,-5.8,2.6,1
4,10,-4.6,1.0,1
...,...,...,...,...
171,210,-0.5,-1.5,1
172,220,0.0,-1.5,1
173,230,0.0,-1.5,1
174,240,0.0,-1.5,1


In [8]:
p_data['Outcome']=0
p_data.drop(["TIME INTERVAL (ms)"], axis=1, inplace=True)
p_data.rename(columns={"ACTUAL TIME (ms)":"MS", "Accel X":"X", "Accel Y":"Y" }, inplace=True)
p_data

Unnamed: 0,MS,X,Y,Outcome
0,0.0,-3.237152,2.039383,0
1,8.0,-3.347595,1.998138,0
2,12.0,-3.489105,1.942612,0
3,15.0,-3.608948,1.878632,0
4,23.0,-3.689438,1.838272,0
...,...,...,...,...
2494,12559.0,0.601135,4.464630,0
2495,12567.0,0.608826,4.460434,0
2496,12573.0,0.615509,4.467102,0
2497,12575.0,0.615509,4.462463,0


In [9]:
df=pd.concat([a_data, p_data], ignore_index=True)
df = df.sample(frac=1).reset_index(drop=True)
#df=df.drop(["MS"], axis=1)

# Creation of desired data set for training the model

Since two data sets were given with different parameters, a simplified dataset called **df** is created which consists of the required variables for training and also an additional column is created to classify the fall as car accident or phone fall

In [10]:
X = df.drop(["Outcome"], axis=1)

In [11]:
Y=df.drop(["X","Y"], axis=1)
Y

Unnamed: 0,MS,Outcome
0,3271.0,0
1,15.0,0
2,4478.0,0
3,9840.0,0
4,2204.0,0
...,...,...
2670,9247.0,0
2671,1155.0,0
2672,12485.0,0
2673,9614.0,0


In [12]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,Y,test_size=0.2,random_state=0)
X_train.head()

Unnamed: 0,MS,X,Y
1478,4650.0,0.429367,5.935669
1079,5173.0,0.108673,4.953491
674,8184.0,0.697479,4.363556
1853,4321.0,0.191727,5.15863
1628,10639.0,0.465118,4.555832


In [13]:
y_test.describe()

Unnamed: 0,MS,Outcome
count,535.0,535.0
mean,6134.611215,0.069159
std,3910.792754,0.253962
min,2.0,0.0
25%,2599.5,0.0
50%,6201.0,0.0
75%,9534.0,0.0
max,12579.0,1.0


# Applying random forest classifier to the training dataset

In [14]:
from sklearn.ensemble import RandomForestRegressor   
# create regressor object 
randomreg = RandomForestRegressor(n_estimators = 100, random_state = 0) 
# fit the regressor with x and y data 
randomreg.fit(X_train, y_train['Outcome'].to_numpy().ravel())

RandomForestRegressor(bootstrap=True, criterion='mse', max_depth=None,
                      max_features='auto', max_leaf_nodes=None,
                      min_impurity_decrease=0.0, min_impurity_split=None,
                      min_samples_leaf=1, min_samples_split=2,
                      min_weight_fraction_leaf=0.0, n_estimators=100,
                      n_jobs=None, oob_score=False, random_state=0, verbose=0,
                      warm_start=False)

In [15]:
y_pred = randomreg.predict(X_test)
y_pred=pd.DataFrame(y_pred)
y_pred=y_pred.round()
y_pred.head()

Unnamed: 0,0
0,0.0
1,0.0
2,0.0
3,0.0
4,0.0


In [16]:
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report
cm = confusion_matrix(y_test, y_pred) 
  
print ("Confusion Matrix : \n", cm)
print(classification_report(y_test,y_pred))

Confusion Matrix : 
 [[506   3]
 [  1  25]]
              precision    recall  f1-score   support

           0       1.00      0.99      1.00       509
           1       0.89      0.96      0.93        26

   micro avg       0.99      0.99      0.99       535
   macro avg       0.95      0.98      0.96       535
weighted avg       0.99      0.99      0.99       535



# Accuracy of the Model
Below code tests and displays the accuracy of the ML model using a test data set which is of 20% of size of entire data

In [17]:
from sklearn import metrics
print(metrics.accuracy_score(y_pred, y_test))
print(metrics.f1_score(y_pred , y_test, average='macro'))
print(metrics.confusion_matrix(y_pred, y_test))

0.9925233644859813
0.9609944590259552
[[506   1]
 [  3  25]]


# Inputing user values

In [18]:
input_x=input()

-16.55


In [19]:
input_y=input()

0.5


In [20]:
data={"X":[input_x],"Y":[input_y]}
check=pd.DataFrame(data)


In [21]:
y_pred2 = randomreg.predict(check)
y_pred2=pd.DataFrame(y_pred2)
y_pred2=y_pred2.round()
#y_pred.drop(index=0)
a=y_pred.loc[0,0]
if (a==1.0):
    print("Car Accident")
else:
    print("Phone Fall")

Phone Fall
