In [1]:
import numpy as np
import pandas as pd 
import seaborn as sns 
import matplotlib.pyplot as plt 
from scipy.stats import f_oneway 
import folium
from folium.plugins import HeatMap
from sklearn.cluster import KMeans
import warnings 
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('dataset\\Global_terrorist.csv', encoding='ISO-8859-1')

In [3]:
df.describe()

Unnamed: 0,eventid,iyear,imonth,iday,extended,country,region,latitude,longitude,specificity,...,ransomamt,ransomamtus,ransompaid,ransompaidus,hostkidoutcome,nreleased,INT_LOG,INT_IDEO,INT_MISC,INT_ANY
count,181691.0,181691.0,181691.0,181691.0,181691.0,181691.0,181691.0,177135.0,177134.0,181685.0,...,1350.0,563.0,774.0,552.0,10991.0,10400.0,181691.0,181691.0,181691.0,181691.0
mean,200270500000.0,2002.638997,6.467277,15.505644,0.045346,131.968501,7.160938,23.498343,-458.6957,1.451452,...,3172530.0,578486.5,717943.7,240.378623,4.629242,-29.018269,-4.543731,-4.464398,0.09001,-3.945952
std,1325957000.0,13.25943,3.388303,8.814045,0.208063,112.414535,2.933408,18.569242,204779.0,0.99543,...,30211570.0,7077924.0,10143920.0,2940.967293,2.03536,65.720119,4.543547,4.637152,0.568457,4.691325
min,197000000000.0,1970.0,0.0,0.0,0.0,4.0,1.0,-53.154613,-86185900.0,1.0,...,-99.0,-99.0,-99.0,-99.0,1.0,-99.0,-9.0,-9.0,-9.0,-9.0
25%,199102100000.0,1991.0,4.0,8.0,0.0,78.0,5.0,11.510046,4.54564,1.0,...,0.0,0.0,-99.0,0.0,2.0,-99.0,-9.0,-9.0,0.0,-9.0
50%,200902200000.0,2009.0,6.0,15.0,0.0,98.0,6.0,31.467463,43.24651,1.0,...,15000.0,0.0,0.0,0.0,4.0,0.0,-9.0,-9.0,0.0,0.0
75%,201408100000.0,2014.0,9.0,23.0,0.0,160.0,10.0,34.685087,68.71033,1.0,...,400000.0,0.0,1273.412,0.0,7.0,1.0,0.0,0.0,0.0,0.0
max,201712300000.0,2017.0,12.0,31.0,1.0,1004.0,12.0,74.633553,179.3667,5.0,...,1000000000.0,132000000.0,275000000.0,48000.0,7.0,2769.0,1.0,1.0,1.0,1.0


In [4]:
df

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,197000000001,1970,7,2,,0,,58,Dominican Republic,2,...,,,,,PGIS,0,0,0,0,
1,197000000002,1970,0,0,,0,,130,Mexico,1,...,,,,,PGIS,0,1,1,1,
2,197001000001,1970,1,0,,0,,160,Philippines,5,...,,,,,PGIS,-9,-9,1,1,
3,197001000002,1970,1,0,,0,,78,Greece,8,...,,,,,PGIS,-9,-9,1,1,
4,197001000003,1970,1,0,,0,,101,Japan,4,...,,,,,PGIS,-9,-9,1,1,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
181686,201712310022,2017,12,31,,0,,182,Somalia,11,...,,"""Somalia: Al-Shabaab Militants Attack Army Che...","""Highlights: Somalia Daily Media Highlights 2 ...","""Highlights: Somalia Daily Media Highlights 1 ...",START Primary Collection,0,0,0,0,
181687,201712310029,2017,12,31,,0,,200,Syria,10,...,,"""Putin's 'victory' in Syria has turned into a ...","""Two Russian soldiers killed at Hmeymim base i...","""Two Russian servicemen killed in Syria mortar...",START Primary Collection,-9,-9,1,1,
181688,201712310030,2017,12,31,,0,,160,Philippines,5,...,,"""Maguindanao clashes trap tribe members,"" Phil...",,,START Primary Collection,0,0,0,0,
181689,201712310031,2017,12,31,,0,,92,India,6,...,,"""Trader escapes grenade attack in Imphal,"" Bus...",,,START Primary Collection,-9,-9,0,-9,


In [5]:
filtered_df = df[df['region_txt'] == 'Middle East & North Africa']
filtered_df = filtered_df[filtered_df['iyear'] >= 2000]
filtered_df = filtered_df[filtered_df['attacktype1_txt'] == 'Bombing/Explosion']
filtered_df = filtered_df[filtered_df['nkill'] >= 10]

print(f"Filtered dataset size: {len(filtered_df)} rows")

Filtered dataset size: 1737 rows


In [6]:
aggregated_df = filtered_df.groupby(['latitude', 'longitude']).size().reset_index(name='incident_count')
print(aggregated_df.head())

    latitude  longitude  incident_count
0  12.743795  44.843678               1
1  12.760069  44.890266               1
2  12.825647  44.846416              12
3  12.832581  45.027071               1
4  12.867085  44.981911               6


In [7]:
aggregated_df 

Unnamed: 0,latitude,longitude,incident_count
0,12.743795,44.843678,1
1,12.760069,44.890266,1
2,12.825647,44.846416,12
3,12.832581,45.027071,1
4,12.867085,44.981911,6
...,...,...,...
517,38.720489,35.482597,1
518,39.798509,32.805613,1
519,39.916839,44.044637,1
520,39.930771,32.767540,3


<h1> Insights 1: How many terrorist incidents occurred in the Middle East & North Africa from 2000 onward? </h1>

In [8]:
filtered_df = df[(df['region_txt'] == 'Middle East & North Africa') & (df['iyear'] >= 2000)]
total_incidents = filtered_df.shape[0]
print(f"Total number of terrorist incidents in the Middle East & North Africa from 2000 onward: {total_incidents}")
aggregated_df = filtered_df.groupby(['latitude', 'longitude']).size().reset_index(name='incident_count')

aggregated_df['normalized_weight'] = (
    (aggregated_df['incident_count'] - aggregated_df['incident_count'].min()) /
    (aggregated_df['incident_count'].max() - aggregated_df['incident_count'].min())
)

heat_data = aggregated_df[['latitude', 'longitude', 'normalized_weight']].values.tolist()
m = folium.Map(location=[30, 40], zoom_start=4)
HeatMap(heat_data, radius=10).add_to(m)
m.save("mena_terrorism_heatmap.html")
m

Total number of terrorist incidents in the Middle East & North Africa from 2000 onward: 41078


<h1> Insights 2: Which regions are the safest versus the most dangerous? </h1>

In [14]:
focus_countries = ['Iraq', 'Egypt', 'Syria']
filtered_df = df[
    (df['region_txt'] == 'Middle East & North Africa') &
    (df['country_txt'].isin(focus_countries))
]
attack_counts = filtered_df['attacktype1_txt'].value_counts()
top_attack_type = attack_counts.idxmax()

print(f"Top attack types in {', '.join(focus_countries)}:\n")
print(attack_counts)

top_attack_df = filtered_df[
    (filtered_df['attacktype1_txt'] == top_attack_type) &
    filtered_df['latitude'].notna() &
    filtered_df['longitude'].notna()
]
heat_data = top_attack_df[['latitude', 'longitude']].values.tolist()
m = folium.Map(location=[33, 43], zoom_start=5, tiles='CartoDB positron')
country_counts = top_attack_df['country_txt'].value_counts()

print("\nNumber of incidents of the most common attack type by country:")
print(country_counts)
safest_country = country_counts.idxmin()
most_dangerous_country = country_counts.idxmax()

print(f"\nSafest country (based on this attack type): {safest_country}")
print(f"Most dangerous country: {most_dangerous_country}")

HeatMap(heat_data, radius=10, blur=15).add_to(m)
m.save("mena_specific_region_heatmap.html")
m

Top attack types in Iraq, Egypt, Syria:

attacktype1_txt
Bombing/Explosion                      20959
Armed Assault                           4144
Assassination                           1630
Hostage Taking (Kidnapping)             1282
Unknown                                  978
Facility/Infrastructure Attack           249
Hostage Taking (Barricade Incident)       34
Hijacking                                 30
Unarmed Assault                           10
Name: count, dtype: int64

Number of incidents of the most common attack type by country:
country_txt
Iraq     18232
Syria     1480
Egypt     1177
Name: count, dtype: int64

Safest country (based on this attack type): Egypt
Most dangerous country: Iraq


<h1> Inisght 3: What do the incident counts for bombing/explosion attacks in Iraq, Syria, and Egypt reveal about the distribution of this attack type across different countries?</h1>

In [10]:
attack_counts = filtered_df['attacktype1_txt'].value_counts()
print(f"Most Frequent Attack Type: {top_attack_type}")
print(f"Incidents Count: {attack_counts.max()}")

heat_data = top_attack_df[['latitude', 'longitude']].values.tolist()
m = folium.Map(location=[33, 43], zoom_start=5,)
HeatMap(heat_data, radius=10, blur=15).add_to(m)

country_counts = top_attack_df['country_txt'].value_counts()

print("\nNumber of incidents of the most common attack type by country:")
print(country_counts)

safest_country = country_counts.idxmin()
most_dangerous_country = country_counts.idxmax()
m

Most Frequent Attack Type: Bombing/Explosion
Incidents Count: 20959

Number of incidents of the most common attack type by country:
country_txt
Iraq     18232
Syria     1480
Egypt     1177
Name: count, dtype: int64


<h1> Insights 4: What does the high clustering of incidents around Iraq and Syria  suggest about the concentration of attack incidents in these areas based on latitude and longitude data?</h1>

In [16]:
filtered_df = filtered_df[filtered_df['latitude'].notna() & filtered_df['longitude'].notna()]
coordinates = filtered_df[['latitude', 'longitude']].values

print("The results indicate that there is a clustering of incidents at specific geographic locations, particularly in areas of Iraq and Syria.")
print("The repeated coordinates (latitude and longitude) suggest that incidents tend to occur in close proximity to each other,")
print("especially in regions such as Iraq and Syria. This clustering pattern indicates that these locations may be more prone to high-frequency incidents,")
print(" likely due to ongoing conflicts or heightened security concerns in these areas.")

kmeans = KMeans(n_clusters=2, random_state=42)
filtered_df['cluster'] = kmeans.fit_predict(coordinates)

m = folium.Map(location=[33, 43], zoom_start=5, tiles='CartoDB dark_matter')
HeatMap(filtered_df[['latitude', 'longitude']].values.tolist(), radius=10, blur=15).add_to(m)


for cluster_center in kmeans.cluster_centers_:
    folium.Marker(
        location=[cluster_center[0], cluster_center[1]],
        popup=f"Cluster center: {cluster_center}",
        icon=folium.Icon(color='red', icon='info-sign')
    ).add_to(m)

print(filtered_df[['latitude', 'longitude', 'cluster']].head())
m


The results indicate that there is a clustering of incidents at specific geographic locations, particularly in areas of Iraq and Syria.
The repeated coordinates (latitude and longitude) suggest that incidents tend to occur in close proximity to each other,
especially in regions such as Iraq and Syria. This clustering pattern indicates that these locations may be more prone to high-frequency incidents,
 likely due to ongoing conflicts or heightened security concerns in these areas.
       latitude  longitude  cluster
133   31.198056  29.919167        1
1092  30.084629  31.334314        1
1181  30.084629  31.334314        1
2512  36.201721  37.143520        1
2540  36.201721  37.143520        1


<h1> Insights 5:How does the presence of 'Unknown' attack type incidents in Southeast Asia reflect the challenges in data collection and event classification in the region?</h1> 

In [21]:
region_name = 'Southeast Asia'
focus_countries = ['Brunei', 'Cambodia', 'Indonesia', 'Laos', 'Malaysia', 'Myanmar', 'Philippines', 'Singapore', 'Thailand', 'Vietnam']
filtered_df = df[
    (df['region_txt'] == region_name) &
    (df['attacktype1_txt'] == 'Unknown') &
    (df['country_txt'].isin(focus_countries))
]

filtered_df = filtered_df[filtered_df['latitude'].notna() & filtered_df['longitude'].notna()]

heat_data = filtered_df[['latitude', 'longitude']].values.tolist()
m = folium.Map(location=[25, 80], zoom_start=5, tiles='CartoDB positron')

HeatMap(heat_data, radius=10, blur=15).add_to(m)
unknown_attack_count = filtered_df.shape[0]
print(f"The presence of {unknown_attack_count} 'Unknown' attack type incidents in {region_name} reflects underlying challenges in data collection and event classification within the region. These challenges may include limited access to conflict zones, inconsistent reporting standards, or insufficient investigative resources. As a result, many incidents cannot be clearly categorized, which affects the accuracy of regional security assessments and may obscure the true scale and nature of terrorist activities in Southeast Asia.") 
m

The presence of 368 'Unknown' attack type incidents in Southeast Asia reflects underlying challenges in data collection and event classification within the region. These challenges may include limited access to conflict zones, inconsistent reporting standards, or insufficient investigative resources. As a result, many incidents cannot be clearly categorized, which affects the accuracy of regional security assessments and may obscure the true scale and nature of terrorist activities in Southeast Asia.
