# Terrorism hotspots

### Preparation of dataset

In [72]:
# Import libraries
import pandas as pd 
import plotly.express as px
import plotly.io as pio

In [73]:
#Import dataset
main_df = pd.read_csv("globalterrorismdb_0718dist.tar.bz2", compression="bz2", low_memory=False)

In [74]:
# Basic stats
def basic_stats(dataframe):
    print(f"Dataframe shape : {dataframe.shape[0]} rows, {dataframe.shape[1]} columns")
    print()
    print('Count of duplicates rows :', len(dataframe)-len(dataframe.drop_duplicates()))
    print()
    
    print("Display of dataset: ")
    display(dataframe.head())
    
    print("Basics statistics: ")
    display(dataframe.describe(include="all"))

    print("Percentage of missing values: ")
    display(100 * dataframe.isnull().sum() / dataframe.shape[0])

    print('Type for each columns')
    display(dataframe.dtypes)

basic_stats(main_df)

Dataframe shape : 181691 rows, 136 columns

Count of duplicates rows : 0

Display of dataset: 


Unnamed: 0.1,Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,0,197000000001,1970,7,2,,0,,58,Dominican Republic,...,,,,,PGIS,0,0,0,0,
1,1,197000000002,1970,0,0,,0,,130,Mexico,...,,,,,PGIS,0,1,1,1,
2,2,197001000001,1970,1,0,,0,,160,Philippines,...,,,,,PGIS,-9,-9,1,1,
3,3,197001000002,1970,1,0,,0,,78,Greece,...,,,,,PGIS,-9,-9,1,1,
4,4,197001000003,1970,1,0,,0,,101,Japan,...,,,,,PGIS,-9,-9,1,1,


Basics statistics: 


Unnamed: 0.1,Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
count,181691.0,181691.0,181691.0,181691.0,181691.0,9239,181691.0,2220,181691.0,181691,...,28289,115500,76933,43516,181691,181691.0,181691.0,181691.0,181691.0,25038
unique,,,,,,2244,,1859,,205,...,15429,83988,62263,36090,26,,,,,14306
top,,,,,,"September 18-24, 2016",,8/4/1998,,Iraq,...,Casualty numbers for this incident conflict ac...,Committee on Government Operations United Stat...,"Christopher Hewitt, ""Political Violence and Te...","Christopher Hewitt, ""Political Violence and Te...",START Primary Collection,,,,,"201612010023, 201612010024, 201612010025, 2016..."
freq,,,,,,101,,18,,24636,...,1607,205,134,139,78002,,,,,80
mean,90845.0,200270500000.0,2002.638997,6.467277,15.505644,,0.045346,,131.968501,,...,,,,,,-4.543731,-4.464398,0.09001,-3.945952,
std,52449.818217,1325957000.0,13.25943,3.388303,8.814045,,0.208063,,112.414535,,...,,,,,,4.543547,4.637152,0.568457,4.691325,
min,0.0,197000000000.0,1970.0,0.0,0.0,,0.0,,4.0,,...,,,,,,-9.0,-9.0,-9.0,-9.0,
25%,45422.5,199102100000.0,1991.0,4.0,8.0,,0.0,,78.0,,...,,,,,,-9.0,-9.0,0.0,-9.0,
50%,90845.0,200902200000.0,2009.0,6.0,15.0,,0.0,,98.0,,...,,,,,,-9.0,-9.0,0.0,0.0,
75%,136267.5,201408100000.0,2014.0,9.0,23.0,,0.0,,160.0,,...,,,,,,0.0,0.0,0.0,0.0,


Percentage of missing values: 


Unnamed: 0     0.000000
eventid        0.000000
iyear          0.000000
imonth         0.000000
iday           0.000000
                ...    
INT_LOG        0.000000
INT_IDEO       0.000000
INT_MISC       0.000000
INT_ANY        0.000000
related       86.219461
Length: 136, dtype: float64

Type for each columns


Unnamed: 0     int64
eventid        int64
iyear          int64
imonth         int64
iday           int64
               ...  
INT_LOG        int64
INT_IDEO       int64
INT_MISC       int64
INT_ANY        int64
related       object
Length: 136, dtype: object

In [75]:
# Keep specific columns for analysis
to_keep = ["iyear", "imonth", "iday", "country_txt", "region_txt", "city", "latitude", "longitude", "success", "suicide", "attacktype1", "attacktype1_txt", "targtype1_txt", "targsubtype1_txt", "target1", "natlty1_txt", "gname", "gsubname", "nperps", "weaptype1_txt", "weapsubtype1_txt", "nkill"]
df = main_df.loc[:,to_keep]

# Basic stats on new dataframe
basic_stats(df)

Dataframe shape : 181691 rows, 22 columns

Count of duplicates rows : 10834

Display of dataset: 


Unnamed: 0,iyear,imonth,iday,country_txt,region_txt,city,latitude,longitude,success,suicide,...,targtype1_txt,targsubtype1_txt,target1,natlty1_txt,gname,gsubname,nperps,weaptype1_txt,weapsubtype1_txt,nkill
0,1970,7,2,Dominican Republic,Central America & Caribbean,Santo Domingo,18.456792,-69.951164,1,0,...,Private Citizens & Property,Named Civilian,Julio Guzman,Dominican Republic,MANO-D,,,Unknown,,1.0
1,1970,0,0,Mexico,North America,Mexico city,19.371887,-99.086624,1,0,...,Government (Diplomatic),"Diplomatic Personnel (outside of embassy, cons...","Nadine Chaval, daughter",Belgium,23rd of September Communist League,,7.0,Unknown,,0.0
2,1970,1,0,Philippines,Southeast Asia,Unknown,15.478598,120.599741,1,0,...,Journalists & Media,Radio Journalist/Staff/Facility,Employee,United States,Unknown,,,Unknown,,1.0
3,1970,1,0,Greece,Western Europe,Athens,37.99749,23.762728,1,0,...,Government (Diplomatic),Embassy/Consulate,U.S. Embassy,United States,Unknown,,,Explosives,Unknown Explosive Type,
4,1970,1,0,Japan,East Asia,Fukouka,33.580412,130.396361,1,0,...,Government (Diplomatic),Embassy/Consulate,U.S. Consulate,United States,Unknown,,,Incendiary,,


Basics statistics: 


Unnamed: 0,iyear,imonth,iday,country_txt,region_txt,city,latitude,longitude,success,suicide,...,targtype1_txt,targsubtype1_txt,target1,natlty1_txt,gname,gsubname,nperps,weaptype1_txt,weapsubtype1_txt,nkill
count,181691.0,181691.0,181691.0,181691,181691,181256,177135.0,177134.0,181691.0,181691.0,...,181691,171318,181053,180132,181691,5890,110576.0,181691,160923,171378.0
unique,,,,205,12,36673,,,,,...,22,112,86005,215,3537,1183,,12,30,
top,,,,Iraq,Middle East & North Africa,Unknown,,,,,...,Private Citizens & Property,Unnamed Civilian/Unspecified,Civilians,Iraq,Unknown,Militants,,Explosives,Unknown Explosive Type,
freq,,,,24636,50474,9775,,,,,...,43511,11596,6461,24113,82782,480,,92426,44980,
mean,2002.638997,6.467277,15.505644,,,,23.498343,-458.6957,0.889598,0.036507,...,,,,,,,-65.361154,,,2.403272
std,13.25943,3.388303,8.814045,,,,18.569242,204779.0,0.313391,0.187549,...,,,,,,,216.536633,,,11.545741
min,1970.0,0.0,0.0,,,,-53.154613,-86185900.0,0.0,0.0,...,,,,,,,-99.0,,,0.0
25%,1991.0,4.0,8.0,,,,11.510046,4.54564,1.0,0.0,...,,,,,,,-99.0,,,0.0
50%,2009.0,6.0,15.0,,,,31.467463,43.24651,1.0,0.0,...,,,,,,,-99.0,,,0.0
75%,2014.0,9.0,23.0,,,,34.685087,68.71033,1.0,0.0,...,,,,,,,1.0,,,2.0


Percentage of missing values: 


iyear                0.000000
imonth               0.000000
iday                 0.000000
country_txt          0.000000
region_txt           0.000000
city                 0.239417
latitude             2.507554
longitude            2.508104
success              0.000000
suicide              0.000000
attacktype1          0.000000
attacktype1_txt      0.000000
targtype1_txt        0.000000
targsubtype1_txt     5.709144
target1              0.351146
natlty1_txt          0.858050
gname                0.000000
gsubname            96.758232
nperps              39.140629
weaptype1_txt        0.000000
weapsubtype1_txt    11.430396
nkill                5.676120
dtype: float64

Type for each columns


iyear                 int64
imonth                int64
iday                  int64
country_txt          object
region_txt           object
city                 object
latitude            float64
longitude           float64
success               int64
suicide               int64
attacktype1           int64
attacktype1_txt      object
targtype1_txt        object
targsubtype1_txt     object
target1              object
natlty1_txt          object
gname                object
gsubname             object
nperps              float64
weaptype1_txt        object
weapsubtype1_txt     object
nkill               float64
dtype: object

In [76]:
# Rename elements for better visibility
df.rename(columns={"iyear": "year",
                   "imonth" : "month",
                   "country_txt": "country",
                   "region_txt": "region",
                   "attacktype1": "attacktype_id",
                   "attacktype1_txt": "attacktype",
                   "targtype1_txt": "target_type",
                   "targsubtype1_txt": "target_subtype",
                   "target1": "target",
                   "natlty1_txt": "nationality",
                   "gname": "terrorist_group",
                   "gsubname": "group_details",
                   "nperps": "group_participants",
                   "weaptype1_txt": "weapon",
                   "weapsubtype1_txt": "weapon_subtype",
                   "nkill": "deaths"
                   }, 
          inplace=True)

df['weapon'] = df['weapon'].apply(lambda x: "Vehicule" if x == "Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)" else x)

In [77]:
# Adding iso-code to countries
iso_code = pd.read_csv('https://raw.githubusercontent.com/plotly/datasets/master/2014_world_gdp_with_codes.csv')
iso_code.rename(columns={"COUNTRY": "country", "CODE": "code"}, inplace=True)

df = df.merge(iso_code, on=['country'])

df["code"].isnull().sum()

0

### 🗺️ Terrorism Hotspots 🗺️

In [78]:
# Group the count by year
region_attack = df.groupby(["latitude", "longitude", "country"]).size().reset_index(name="count")
region_attack

Unnamed: 0,latitude,longitude,country,count
0,-53.154613,-70.916943,Chile,5
1,-45.867889,-67.499911,Argentina,5
2,-45.571112,-72.068517,Chile,2
3,-45.403544,-72.686416,Chile,1
4,-43.532054,172.636225,New Zealand,4
...,...,...,...,...
47420,64.837778,-147.716389,United States,1
47421,65.012089,25.465077,Finland,1
47422,65.683368,-18.100080,Iceland,1
47423,65.825119,21.688703,Sweden,1


In [79]:
# Display heatmap to identify hotspots
fig_map = px.density_mapbox(region_attack, 
                            lat='latitude', lon='longitude',
                            z='count', zoom=1.15,
                            color_continuous_scale="oryel",
                            mapbox_style='carto-positron',
                            width=1200, height=800,
                            center={"lat": 38.603354, "lon": 25.888334})

fig_map.update_layout(
              title={
                  "text": "Terrorism hotspots from 1970 to 2017"
              },
              xaxis_title="number of attacks",
              title_x=0.5
              )

fig_map.show()

### 📅 How has the number of terrorist activities changed over the years? 📅

In [80]:
# Group the count by year
yearly_attack = df.groupby("year").size().reset_index(name="count")

In [81]:
# Display a line chart
fig = px.line(yearly_attack, x="year", y="count")

fig.update_layout(
              title={
                  "text": "Terrorism activity over the years"
              },
              xaxis_title="number of attacks"
              )

fig.show()

We can notice a major increase from 2011 to 2014, mainly explained by Boko Haram and Daesh activities.

### 🌍 Are there certain regions where this trend is different from the global average? 🌍

In [82]:
# Group the count by year and region
region_attack_year = df.groupby(["year", "region"]).size().reset_index(name="count")
region_attack_year.head()

Unnamed: 0,year,region,count
0,1970,Australasia & Oceania,1
1,1970,Central America & Caribbean,7
2,1970,East Asia,2
3,1970,Middle East & North Africa,28
4,1970,North America,472


In [83]:
# Display a line chart
fig2 = px.line(region_attack_year, x="year", y="count", color="region")

fig2.update_layout(
              title_text= "Terrorism activity across regions",
              xaxis_title="number of attacks"
              )

fig2.show()

Most of the increase is due to terrorism activites in Middle East & North Africa, and South Asia.
Let's focus on 2014 to indentify the most impacted countries.

In [84]:
# Keep rows for 2014
df_2014 = df[df["year"] == 2014]

attack_2014 = df_2014.groupby(["year", "country", "code"]).size().reset_index(name="count")
attack_2014.head()

Unnamed: 0,year,country,code,count
0,2014,Afghanistan,AFG,1824
1,2014,Albania,ALB,2
2,2014,Algeria,DZA,13
3,2014,Argentina,ARG,1
4,2014,Australia,AUS,8


In [85]:
fig_country = px.choropleth(attack_2014, 
                            locations="code",         
                            color="count",    
                            hover_name="country",    
                            color_continuous_scale="OrRd",
                            height=600
                            )

fig_country.update_geos(fitbounds="locations", visible=True)

fig_country.update_layout(
    margin={"r":0,"t":50,"l":0,"b":50},
    title_text="Terrorism activity in 2014"
)

fig_country.show()


Iraq, Pakistan and Afghanistan are the main countries with terrosist attacks in 2014.

### ↔️ Is the number of incidents and the number of victims correlated? ↔️

In [86]:
# Display occurrences and totals deaths as a scatter chart to see correlation
df_summary = df.groupby("region").agg(
    occurrences=("region", "size"),  
    total_deaths=("deaths", "sum")
).reset_index()

df_summary

Unnamed: 0,region,occurrences,total_deaths
0,Australasia & Oceania,280,150.0
1,Central America & Caribbean,10268,28697.0
2,Central Asia,563,1000.0
3,East Asia,763,1139.0
4,Eastern Europe,4627,7082.0
5,Middle East & North Africa,48233,136137.0
6,North America,3456,4916.0
7,South America,18970,28848.0
8,South Asia,44974,101319.0
9,Southeast Asia,11928,14267.0


In [87]:
fig_victim = px.scatter(df_summary, x="occurrences", y="total_deaths", 
                        color="region", trendline="ols", trendline_scope="overall")

fig_victim.update_layout(
    showlegend=False,
    title_text="Correlation between attacks occurrences and number of deaths")

fig_victim.show()


There may be a correlation between these two elements, but we can see that two regions move away from the trendline : Western Europe and Sub-Saharan Africa. For the same number of attacks, Western Europe has almost 12 times fewer victims.

### What are the most common methods of attacks? 

In [88]:
# Group data by weapon type
df_weapon = df.groupby("weapon").size().reset_index(name="count").sort_values(by="count")

df_weapon

Unnamed: 0,weapon,count
8,Radiological,14
3,Fake Weapons,33
0,Biological,35
11,Vehicule,84
7,Other,108
9,Sabotage Equipment,139
1,Chemical,308
6,Melee,3118
5,Incendiary,10560
10,Unknown,14609


In [89]:
# Display data as bar chart
fig_weapon = px.bar(df_weapon, x="count", y="weapon")

fig_weapon.update_layout(height=800,
                         title="Weapon type repartition")

fig_weapon.show()

Explosives and firearms are the main weapons in attacks.

### Does it differ in various regions ?

In [90]:
# Count each weapon type per region
df_method = df.groupby(["region", "weapon"]).size().reset_index(name="count")
df_method

Unnamed: 0,region,weapon,count
0,Australasia & Oceania,Chemical,11
1,Australasia & Oceania,Explosives,79
2,Australasia & Oceania,Firearms,74
3,Australasia & Oceania,Incendiary,73
4,Australasia & Oceania,Melee,10
...,...,...,...
117,Western Europe,Other,19
118,Western Europe,Radiological,2
119,Western Europe,Sabotage Equipment,10
120,Western Europe,Unknown,800


In [91]:
# Display a heatmap to analyze regions vs weapon types
heatmap = px.density_heatmap(df_method, 
                         x="region", 
                         y="weapon", 
                         z="count",
                         color_continuous_scale="oryel")

heatmap.update_layout(
    title="Occurrences des types d'armes par région",
    xaxis={'categoryorder':'total descending'}, 
    yaxis={'categoryorder':'total descending'}, 
    height=800
)

heatmap.show()

There is no clear difference.

### Does it differ in time?

In [92]:
# Group years in decades for a better analysis and visibility
df["decade"] = df["year"].apply(lambda x: 1970 if x < 1980 else 
                                                  1980 if x < 1990 else 
                                                  1990 if x < 2000 else 
                                                  2000 if x < 2010 else 2010)

df_date = df.groupby(["decade", "weapon"]).size().reset_index(name='count')

df_date.head()

Unnamed: 0,decade,weapon,count
0,1970,Chemical,26
1,1970,Explosives,4277
2,1970,Fake Weapons,3
3,1970,Firearms,2916
4,1970,Incendiary,1203


In [93]:
# Display a bar chart to see weapon type through decades
fig_date = px.bar(df_date, x="decade", y="count",
                    color="weapon",
                    barmode="group",
                    height=500, 
                    width=1000)
fig_date.show()

Firearms were pratically as used as explosives from 1970 to 1990. From 2000 explosives were the main weapon type.