<a href="https://colab.research.google.com/github/aboXmsa3d/Accidents-and-Anomalies-Correlation/blob/main/Anomalies_and_Accidents_Correlation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### General Properties

#### Import Dataset From Google Drive



In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


#### Importing Needed packages

In [None]:
import numpy as np
import pandas as pd

import plotly.express as px
import plotly.graph_objects as go

import matplotlib.pyplot as plt
% matplotlib inline

#### Import Datasets

In [None]:
df_accident = pd.read_csv("/content/drive/MyDrive/Programming/datasets/GP/accident.csv")

In [None]:
df_anomalie = pd.read_csv("/content/drive/MyDrive/Programming/datasets/GP/anomalie.csv")

In [None]:
df_accident = df_accident.rename(columns=lambda x: x.strip())
df_anomalie = df_anomalie.rename(columns=lambda x: x.strip())

In [None]:
df_accident

Unnamed: 0,id,latitude,longitude
0,1,24.72221,46.61561
1,2,24.72543,46.61578
2,3,24.7283,46.61579
3,4,24.72217,46.61101
4,5,24.72224,46.61535
5,6,24.72214,46.61541
6,7,24.72214,46.61519
7,8,24.73039,46.61412
8,9,24.73033,46.61414
9,10,24.73042,46.61403


In [None]:
df_anomalie

Unnamed: 0,id,latitude,longitude
0,1,24.72214,46.61524
1,2,24.72532,46.61581
2,3,24.7268,46.61579
3,4,24.73034,46.61405
4,5,24.73096,46.62081
5,6,24.73052,46.62821
6,7,24.72824,46.63007
7,8,24.72428,46.63141
8,9,24.72327,46.62498
9,10,24.73204,46.61805


# Exploratory Data Analysis

### Calculate the distance between two pointson

In [None]:
# Helper Function
def haversine_np(lat1, lon1, lat2, lon2 , anamoly_id):
    """
    Calculate the great circle distance between two pointson the earth
    """
    lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])

    dlat = lat2 - lat1
    dlon = lon2 - lon1

    a = np.sin(dlat/2.0)**2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon/2.0)**2

    c = 2 * np.arcsin(np.sqrt(a))
    distance = 6367 * c * 1000
    return pd.Series([anamoly_id, distance], index=['id', 'distance'] ) # in metter

In [None]:
# haversine_np(24.7221493	, 46.6152429 , 24.7221468 , 46.6145546)

# https://www.google.com/maps/dir/24.7221493,46.6152429/24.7221468,46.6145546/@24.7220957,46.6149714,20.91z/data=!4m2!4m1!3e2

### Get the closest anomalie for each accident

In [None]:
for index, accident in df_accident.iterrows():

    df_closest_anomalie = df_anomalie.apply(lambda anomalie: haversine_np(accident["latitude"] , accident["longitude"] , anomalie["latitude"] , anomalie["longitude"], anomalie["id"]), axis=1)

    idx = df_closest_anomalie['distance'].idxmin()

    df_accident.loc[index,'closest_anomalie_id'] = df_closest_anomalie['id'].loc[idx,]
    df_accident.loc[index,'closest_anomalie_distance'] = df_closest_anomalie['distance'].loc[idx,]

df_accident

Unnamed: 0,id,latitude,longitude,closest_anomalie_id,closest_anomalie_distance
0,1,24.72221,46.61561,14.0,14.912774
1,2,24.72543,46.61578,2.0,12.593249
2,3,24.7283,46.61579,20.0,40.005041
3,4,24.72217,46.61101,15.0,308.004775
4,5,24.72224,46.61535,14.0,11.709045
5,6,24.72214,46.61541,14.0,12.46757
6,7,24.72214,46.61519,1.0,5.047006
7,8,24.73039,46.61412,4.0,8.988383
8,9,24.73033,46.61414,19.0,7.778758
9,10,24.73042,46.61403,4.0,9.11632


### Select accident due to anomalie

In [None]:
df_accident_due_to_anomalie = df_accident[df_accident['closest_anomalie_distance'] <= 10]
df_accident_due_to_anomalie

Unnamed: 0,id,latitude,longitude,closest_anomalie_id,closest_anomalie_distance
6,7,24.72214,46.61519,1.0,5.047006
7,8,24.73039,46.61412,4.0,8.988383
8,9,24.73033,46.61414,19.0,7.778758
9,10,24.73042,46.61403,4.0,9.11632
10,11,24.73031,46.6141,4.0,6.048374


In [None]:
df_accident_due_to_anomalie = pd.merge(df_accident_due_to_anomalie, df_anomalie, 
                                       left_on="closest_anomalie_id", right_on="id",
                                       how="left", suffixes=("_accident", "_anomalie"),)
df_accident_due_to_anomalie

Unnamed: 0,id_accident,latitude_accident,longitude_accident,closest_anomalie_id,closest_anomalie_distance,id_anomalie,latitude_anomalie,longitude_anomalie
0,7,24.72214,46.61519,1.0,5.047006,1,24.72214,46.61524
1,8,24.73039,46.61412,4.0,8.988383,4,24.73034,46.61405
2,9,24.73033,46.61414,19.0,7.778758,19,24.73026,46.61414
3,10,24.73042,46.61403,4.0,9.11632,4,24.73034,46.61405
4,11,24.73031,46.6141,4.0,6.048374,4,24.73034,46.61405


# Visualization

## All Accidents and Anomalies

In [None]:
fig = go.Figure()
fig.add_traces(
    go.Scattermapbox(
        lat=df_accident['latitude'],
        lon=df_accident['longitude'],
        mode='markers',
        name = "Accident",
        hovertext="Accident",
        marker_size=10,
    )
)
fig.add_traces(
    go.Scattermapbox(
        lat=df_anomalie['latitude'],
        lon=df_anomalie['longitude'],
        mode='markers',
        name = "Anomalie",
        hovertext="Anomalie",
        marker_size=10,
    )
)
  
for index, row in df_accident_due_to_anomalie.iterrows():
  fig.add_traces(
    go.Scattermapbox(
        lat=[row.latitude_accident,row.latitude_anomalie],
        lon=[row.longitude_accident,row.longitude_anomalie],
        mode='lines',
        name = "link",
        line_width = 2.5 ,
        line_color = "red",
        showlegend = False,
        hoverinfo = "skip",
      )
)

fig.update_layout(
    mapbox=dict(
        accesstoken="pk.eyJ1IjoiYWJveG1zYTNkIiwiYSI6ImNrdmcyam9veTAxYXcyb29kY3B1d2E4YnkifQ.aUL_zd92ZIrJtUxaHIvDqw", #
        center=go.layout.mapbox.Center(lat=24.72381, lon=46.6276),
        zoom=13.5,
    )
)

fig.show()

## Only Accident due to Anomalie

In [None]:
fig = go.Figure()
fig.add_traces(
    go.Scattermapbox(
        lat=df_accident_due_to_anomalie['latitude_accident'],
        lon=df_accident_due_to_anomalie['longitude_accident'],
        mode='markers',
        name = "Accident",
        hovertext="Accident",
        marker_size=10,
    )
)
fig.add_traces(
    go.Scattermapbox(
        lat=df_accident_due_to_anomalie['latitude_anomalie'],
        lon=df_accident_due_to_anomalie['longitude_anomalie'],
        mode='markers',
        name = "Anomalie",
        hovertext="Anomalie",
        marker_size=10,
    )
)
  
for index, row in df_accident_due_to_anomalie.iterrows():
  fig.add_traces(
    go.Scattermapbox(
        lat=[row.latitude_accident,row.latitude_anomalie],
        lon=[row.longitude_accident,row.longitude_anomalie],
        mode='lines',
        line_width = 2.5 ,
        line_color = "red",
        showlegend = False,
        hoverinfo = "skip",
      )
)

fig.update_layout(
    mapbox=dict(
        accesstoken="pk.eyJ1IjoiYWJveG1zYTNkIiwiYSI6ImNrdmcyam9veTAxYXcyb29kY3B1d2E4YnkifQ.aUL_zd92ZIrJtUxaHIvDqw", #
        center=go.layout.mapbox.Center(lat=24.72381, lon=46.6276),
        zoom=13.5,
    )
)

fig.show()

# Conclusion

### (1) number of anomalies that caused accidents

In [None]:
anomalie_cause_accident = df_accident_due_to_anomalie['id_anomalie'].nunique()
accident_due_to_anomalie_number = df_accident_due_to_anomalie['id_accident'].count() 

In [None]:
print("There is (" , anomalie_cause_accident , ") anomalies that caused (" , accident_due_to_anomalie_number , ") accidents." )

There is ( 3 ) anomalies that caused ( 5 ) accidents.


### (2) Number of accidents happend due to anomalie

In [None]:
total_accident = df_accident['id'].count()
accident_due_to_anomalie_number = df_accident_due_to_anomalie['id_accident'].count() 

accident_due_to_anomalie_per = accident_due_to_anomalie_number / total_accident
accident_due_to_anomalie_per = round(accident_due_to_anomalie_per*100, 2)

In [None]:
print("There is (" , accident_due_to_anomalie_number , ") accidents happend due to anomalie, out of (" , total_accident , ") accident." )
print("This represents (", accident_due_to_anomalie_per ,"% ) of accidents happend due to anomalie.")

There is ( 5 ) accidents happend due to anomalie, out of ( 20 ) accident.
This represents ( 25.0 % ) of accidents happend due to anomalie.
