In [3]:
# Imports

import numpy as np
import pandas as pd

import warnings
warnings.filterwarnings('ignore')

In [4]:
# Read data and save as a pandas dataframe object

terrorism_df = pd.read_csv('globalterrorismdb_0718dist.csv', encoding='ISO-8859-1')
terrorism_df.head()

Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,197000000001,1970,7,2,,0,,58,Dominican Republic,2,...,,,,,PGIS,0,0,0,0,
1,197000000002,1970,0,0,,0,,130,Mexico,1,...,,,,,PGIS,0,1,1,1,
2,197001000001,1970,1,0,,0,,160,Philippines,5,...,,,,,PGIS,-9,-9,1,1,
3,197001000002,1970,1,0,,0,,78,Greece,8,...,,,,,PGIS,-9,-9,1,1,
4,197001000003,1970,1,0,,0,,101,Japan,4,...,,,,,PGIS,-9,-9,1,1,


In [5]:
# Take a subset of the dataset

terrorism_df = terrorism_df[['iyear', 'imonth', 'iday', 'country_txt', 'region_txt', 'attacktype1_txt', 'target1', 'nkill', 'nwound', 'gname','targtype1_txt', 'weaptype1_txt', 'motive']]
terrorism_df.rename(columns={'iyear':'Year', 'imonth':'Month', 'iday':'Day', 'country_txt':'Country', 'region_txt':'Region', 'attacktype1_txt':'AttackType', 'target1':'Target', 'nkill':'Killed', 'nwound':'Wounded', 'gname':'Group', 'targtype1_txt':'Target_type', 'weaptype1_txt':'Weapon_type', 'motive':'Motive'}, inplace=True)
terrorism_df.head()

Unnamed: 0,Year,Month,Day,Country,Region,AttackType,Target,Killed,Wounded,Summary,Group,Target_type,Weapon_type,Motive
0,1970,7,2,Dominican Republic,Central America & Caribbean,Assassination,Julio Guzman,1.0,0.0,,MANO-D,Private Citizens & Property,Unknown,
1,1970,0,0,Mexico,North America,Hostage Taking (Kidnapping),"Nadine Chaval, daughter",0.0,0.0,,23rd of September Communist League,Government (Diplomatic),Unknown,
2,1970,1,0,Philippines,Southeast Asia,Assassination,Employee,1.0,0.0,,Unknown,Journalists & Media,Unknown,
3,1970,1,0,Greece,Western Europe,Bombing/Explosion,U.S. Embassy,,,,Unknown,Government (Diplomatic),Explosives,
4,1970,1,0,Japan,East Asia,Facility/Infrastructure Attack,U.S. Consulate,,,,Unknown,Government (Diplomatic),Incendiary,


In [6]:
# Print data types in each column and total number of records

print(terrorism_df.dtypes)
print(len(terrorism_df))

Year             int64
Month            int64
Day              int64
Country         object
Region          object
AttackType      object
Target          object
Killed         float64
Wounded        float64
Summary         object
Group           object
Target_type     object
Weapon_type     object
Motive          object
dtype: object
181691


In [7]:
# Check for null values

terrorism_df.isnull().sum()

Year                0
Month               0
Day                 0
Country             0
Region              0
AttackType          0
Target            636
Killed          10313
Wounded         16311
Summary         66129
Group               0
Target_type         0
Weapon_type         0
Motive         131130
dtype: int64

In [8]:
# Statistics for numerical (float, integer) column

terrorism_df.describe()

Unnamed: 0,Year,Month,Day,Killed,Wounded
count,181691.0,181691.0,181691.0,171378.0,165380.0
mean,2002.638997,6.467277,15.505644,2.403272,3.167668
std,13.25943,3.388303,8.814045,11.545741,35.949392
min,1970.0,0.0,0.0,0.0,0.0
25%,1991.0,4.0,8.0,0.0,0.0
50%,2009.0,6.0,15.0,0.0,0.0
75%,2014.0,9.0,23.0,2.0,2.0
max,2017.0,12.0,31.0,1570.0,8191.0


In [33]:
# Statistics for categorical column

terrorism_df['Region'].value_counts()

Middle East & North Africa     50474
South Asia                     44974
South America                  18978
Sub-Saharan Africa             17550
Western Europe                 16639
Southeast Asia                 12485
Central America & Caribbean    10344
Eastern Europe                  5144
North America                   3456
East Asia                        802
Central Asia                     563
Australasia & Oceania            282
Name: Region, dtype: int64

In [12]:
# Statistics for categorical columns

print('Statistics by country:')
print('Country with Highest Terrorist Attacks:', terrorism_df['Country'].value_counts().index[0])
print('Country with lowest Terrorist Attacks:', terrorism_df['Country'].value_counts().index[-1])

print('\nStatistics by region:')
print('Region with Highest Terrorist Attacks:', terrorism_df['Region'].value_counts().index[0])
print('Region with lowest Terrorist Attacks:', terrorism_df['Region'].value_counts().index[-1])

print('\nStatistics by year:')
print('Year with Highest Terrorist Attacks:', terrorism_df['Year'].value_counts().index[0])
print('Year with lowest Terrorist Attacks:', terrorism_df['Year'].value_counts().index[-1])

print('\nStatistics by year:')
print('Year with Highest Terrorist Attacks:', terrorism_df['Year'].value_counts().index[0])
print('Year with Highest Terrorist Attacks:', terrorism_df['Year'].value_counts().index[-1])

Statistics by country:
Country with Highest Terrorist Attacks: Iraq
Country with lowest Terrorist Attacks: Antigua and Barbuda

Statistics by region:
Region with Highest Terrorist Attacks: Middle East & North Africa
Region with Highest Terrorist Attacks: Australasia & Oceania

Statistics by year:
Year with Highest Terrorist Attacks: 2014
Year with Highest Terrorist Attacks: 1971


In [18]:
# Basic Analysis

terrorism_df['casuality'] = terrorism_df['Killed'] + terrorism_df['Wounded']

print('The terrorist attack with highest casualty of', terrorism_df['casuality'].max(), \
      'people took place in',  terrorism_df.loc[terrorism_df['casuality'].idxmax()].Country, 'in the year', \
      terrorism_df.loc[terrorism_df['casuality'].idxmax()].Year)

The terrorist attack with highest casualty of 9574.0 people took place in United States in the year 2001
