In [20]:
import pandas as pd
# importing geocoding library to get latitude and longitude
from geopy.geocoders import GoogleV3

# Geospatial Analysis
Utilize the event location, district, and region information to perform geospatial analysis. Visualize the distribution of fatalities on a map and identify areas that have experienced higher levels of violence.

# Data Ingestion & Preprocessing

In [2]:
fatalities_spatiotemporal_df = pd.read_csv('Fatalities_Israel-Palestine.zip', compression='zip')

In [3]:
fatalities_spatiotemporal_df.shape

(11124, 16)

In [4]:
fatalities_spatiotemporal_df.isna().sum()

name                               0
date_of_event                      0
age                              129
citizenship                        0
event_location                     0
event_location_district            0
event_location_region              0
date_of_death                      0
gender                            20
took_part_in_the_hostilities    1430
place_of_residence                68
place_of_residence_district       68
type_of_injury                   291
ammunition                      5253
killed_by                          0
notes                            280
dtype: int64

## Imputing missing values

In [5]:
# imput age with the median
fatalities_spatiotemporal_df['age'].fillna(fatalities_spatiotemporal_df['age'].median(), inplace=True)

In [6]:
text_columns = [
    'name', 'citizenship', 'event_location', 'event_location_district',
    'event_location_region', 'gender', 'place_of_residence',
    'place_of_residence_district', 'type_of_injury', 'ammunition',
    'killed_by', 'notes', 'took_part_in_the_hostilities'
]

In [7]:
# replace 'nan' with 'unknown' in text columns
for column in text_columns:
    if column in fatalities_spatiotemporal_df.columns:
        fatalities_spatiotemporal_df[column].replace('nan', 'Unknown', inplace=True)

## Data typing

In [8]:
# Check the data types
fatalities_spatiotemporal_df.dtypes

name                             object
date_of_event                    object
age                             float64
citizenship                      object
event_location                   object
event_location_district          object
event_location_region            object
date_of_death                    object
gender                           object
took_part_in_the_hostilities     object
place_of_residence               object
place_of_residence_district      object
type_of_injury                   object
ammunition                       object
killed_by                        object
notes                            object
dtype: object

In [9]:
# Convert the date column to datetime format
fatalities_spatiotemporal_df['date_of_event'] = pd.to_datetime(fatalities_spatiotemporal_df['date_of_event'])

In [10]:
# set age to numeric
fatalities_spatiotemporal_df['age'] = pd.to_numeric(fatalities_spatiotemporal_df['age'], errors='coerce')
#set date_of_death to datetime
fatalities_spatiotemporal_df['date_of_death'] = pd.to_datetime(fatalities_spatiotemporal_df['date_of_death'], errors='coerce')
# set name, citizenship, event_location, event_location_district, event_location_region, gender, place_of_residence, place_of_residence_district, type_of_injury, ammunition, killed_by, and notes to text/string
# List of columns to change to string

# Change data type of specified columns to 'object' to ensure they're strings
for column in text_columns:
    if column in fatalities_spatiotemporal_df.columns:
        fatalities_spatiotemporal_df[column] = fatalities_spatiotemporal_df[column].astype('string')


In [11]:
# Check the data types
fatalities_spatiotemporal_df.dtypes

name                            string[python]
date_of_event                   datetime64[ns]
age                                    float64
citizenship                     string[python]
event_location                  string[python]
event_location_district         string[python]
event_location_region           string[python]
date_of_death                   datetime64[ns]
gender                          string[python]
took_part_in_the_hostilities    string[python]
place_of_residence              string[python]
place_of_residence_district     string[python]
type_of_injury                  string[python]
ammunition                      string[python]
killed_by                       string[python]
notes                           string[python]
dtype: object

## Data Cleaning and Feature Engineering

In [18]:
fatalities_spatiotemporal_df.tail()

Unnamed: 0,name,date_of_event,age,citizenship,event_location,event_location_district,event_location_region,date_of_death,gender,took_part_in_the_hostilities,place_of_residence,place_of_residence_district,type_of_injury,ammunition,killed_by,notes
11119,Binyamin Herling,2000-10-19,64.0,Israeli,Nablus,Nablus,West Bank,2000-10-19,M,Israelis,Kedumim,Tulkarm,gunfire,live ammunition,Palestinian civilians,Killed while hiking on Mt. Eival.
11120,Farid Musa 'Issa a-Nesasreh,2000-10-17,28.0,Palestinian,Beit Furik,Nablus,West Bank,2000-10-17,M,Unknown,Beit Furik,Nablus,gunfire,,Israeli civilians,Killed by a settler from Itamar while harvesti...
11121,Hillel Lieberman,2000-10-07,36.0,Israeli,Nablus,Nablus,West Bank,2000-10-07,M,Israelis,Elon Moreh,Nablus,gunfire,live ammunition,Palestinian civilians,His body was found a day after he disappeared.
11122,Fahed Mustafa 'Odeh Baker,2000-10-07,21.0,Palestinian,Bidya,Salfit,West Bank,2000-10-07,M,No,Bidya,Salfit,gunfire,,Israeli civilians,Killed by settlers who rioted in Biddya village.
11123,Wichlav Zalsevsky,2000-10-02,24.0,Israeli,Masha,Salfit,West Bank,2000-10-02,M,Israelis,Ashdod,Israel,gunfire,live ammunition,Palestinian civilians,


In [12]:
spatio_temporal_columns = ['citizenship', 'event_location', 'event_location_district',
    'event_location_region', 'place_of_residence',
    'place_of_residence_district', 'date_of_event', 'date_of_death']

In [16]:
# dropping rows if place_of_residence and place_of_residence_district is unknown
fatalities_spatiotemporal_df.drop(fatalities_spatiotemporal_df[fatalities_spatiotemporal_df['place_of_residence'] == 'Unknown'].index, inplace=True)
fatalities_spatiotemporal_df.drop(fatalities_spatiotemporal_df[fatalities_spatiotemporal_df['place_of_residence_district'] == 'Unknown'].index, inplace=True)

In [17]:
fatalities_spatiotemporal_df.shape

(11124, 16)

In [21]:
googleAPIkey = 'AIzaSyD8zRcgbCnN791CP1OFZ58vNyHrZxsplrI'
# using geopy to get latitude and longitude
geolocator = GoogleV3(api_key=googleAPIkey)
# use geolocator to get latitude and longitude for event_location
fatalities_spatiotemporal_df['event_location_latitude'] = fatalities_spatiotemporal_df['event_location'].apply(lambda x: geolocator.geocode(x).latitude if geolocator.geocode(x) else 'Not Found')
fatalities_spatiotemporal_df['event_location_longitude'] = fatalities_spatiotemporal_df['event_location'].apply(lambda x: geolocator.geocode(x).longitude if geolocator.geocode(x) else 'Not Found')

In [29]:
fatalities_spatiotemporal_df.iloc[:, -2:].describe()

Unnamed: 0,event_location_latitude,event_location_longitude
count,11124.0,11124.0
unique,340.0,340.0
top,31.501695,34.466845
freq,2232.0,2232.0


In [31]:
fatalities_spatiotemporal_df.iloc[:, -2:].isna().sum()

event_location_latitude     0
event_location_longitude    0
dtype: int64

In [36]:
fatalities_spatiotemporal_df.event_location_latitude.value_counts()['Not Found']

1477

In [37]:
fatalities_spatiotemporal_df.event_location_longitude.value_counts()['Not Found']

1477

In [40]:
print('% of rows to be dropped because geolocation data was not returned using the event_location column to infer latitude and longitude using the GoogleCloud Geolocation API: ', fatalities_spatiotemporal_df.event_location_longitude.value_counts()['Not Found'] / fatalities_spatiotemporal_df.shape[0])

% of rows to be dropped because geolocation data was not returned using the event_location column to infer latitude and longitude using the GoogleCloud Geolocation API:  0.1327759798633585


In [41]:
# Drop rows where the geolocation is 'Not Found' in both the latitude and longitude columns
fatalities_spatiotemporal_df.drop(fatalities_spatiotemporal_df[fatalities_spatiotemporal_df['event_location_latitude'] == 'Not Found'].index, inplace=True)
fatalities_spatiotemporal_df.drop(fatalities_spatiotemporal_df[fatalities_spatiotemporal_df['event_location_longitude'] == 'Not Found'].index, inplace=True)

In [42]:
fatalities_spatiotemporal_df.shape

(9647, 18)

# Exploratory Data Analysis

In [44]:
fatalities_spatiotemporal_df.head()

Unnamed: 0,name,date_of_event,age,citizenship,event_location,event_location_district,event_location_region,date_of_death,gender,took_part_in_the_hostilities,place_of_residence,place_of_residence_district,type_of_injury,ammunition,killed_by,notes,event_location_latitude,event_location_longitude
0,'Abd a-Rahman Suleiman Muhammad Abu Daghash,2023-09-24,32.0,Palestinian,Nur Shams R.C.,Tulkarm,West Bank,2023-09-24,M,,Nur Shams R.C.,Tulkarm,gunfire,live ammunition,Israeli security forces,Fatally shot by Israeli forces while standing ...,32.318484,35.059594
1,Usayed Farhan Muhammad 'Ali Abu 'Ali,2023-09-24,21.0,Palestinian,Nur Shams R.C.,Tulkarm,West Bank,2023-09-24,M,,Nur Shams R.C.,Tulkarm,gunfire,live ammunition,Israeli security forces,Fatally shot by Israeli forces while trying to...,32.318484,35.059594
2,'Abdallah 'Imad Sa'ed Abu Hassan,2023-09-22,16.0,Palestinian,Kfar Dan,Jenin,West Bank,2023-09-22,M,,al-Yamun,Jenin,gunfire,live ammunition,Israeli security forces,Fatally shot by soldiers while firing at them ...,34.011212,36.046316
3,Durgham Muhammad Yihya al-Akhras,2023-09-20,19.0,Palestinian,'Aqbat Jaber R.C.,Jericho,West Bank,2023-09-20,M,,'Aqbat Jaber R.C.,Jericho,gunfire,live ammunition,Israeli security forces,Shot in the head by Israeli forces while throw...,31.842048,35.445564
4,Raafat 'Omar Ahmad Khamaisah,2023-09-19,15.0,Palestinian,Jenin R.C.,Jenin,West Bank,2023-09-19,M,,Jenin,Jenin,gunfire,live ammunition,Israeli security forces,Wounded by soldiers’ gunfire after running awa...,32.464635,35.293859


In [None]:
import altair as alt

# Load a GeoJSON file containing the geographic boundaries of your regions.
# This could be countries, states, provinces, etc. You can find these files online or from governmental open data websites.
geojson_url = 'https://example.com/path/to/your/map.geojson'

# Basic map display
base_map = alt.Chart(alt.Data(url=geojson_url, format=alt.DataFormat(property='features',type='json'))).mark_geoshape(
    fill='lightgray',
    stroke='white'
).encode()