In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pylab as plt
import cartopy.crs as ccrs

pd.set_option('display.max_columns', None)
pd.set_option('display.expand_frame_repr', False)

## Load the Data

In [None]:
dataset = pd.read_csv('Raw datasets/globalterrorismdb.csv', encoding='ISO-8859-1')

## Preliminary Analysis
Questions:
1. What is the size of my dataset and what are the variable data types?
2. What does my data look like?
3. Are there any missing variables?

In [None]:
dataset.shape

In [None]:
dataset.head()

In [None]:
# Renaming the columns for readability
dataset.rename(columns={'iyear':'Year', 'imonth':'Month', 'iday':'Day', 'country_txt':'Country', 'region_txt':'Region', 'attacktype1_txt':'Attack_type', 'targtype1_txt':'Target_type', 'target1':'Target', 'weaptype1_txt':'WeaponType', 'nkill':'Killed','nwound':'Wounded', 'gname':'Group'}, inplace=True)

In [None]:
# Columns too many; select desired columns and reorder them
dataset['casualties'] = dataset['Killed']+dataset['Wounded']
dataset = dataset[['Year', 'Month', 'Day', 'Country', 'Region', 'city', 'latitude', 'longitude', 'Attack_type', 'Target_type', 'Target', 'WeaponType', 'motive', 'casualties', 'property', 'Group', 'success', 'summary']]
dataset.head()

In [None]:
dataset.dtypes

### Missing values

In [None]:
dataset.isnull().sum()

## General Insights

In [None]:
corr_matrix = dataset.corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, cmap='coolwarm', annot=True)
plt.title('Correlation Matrix')
plt.show()

In [None]:
# Ignore corelation between wounded

In [None]:
plt.figure(figsize=(12, 6))
sns.countplot(data=dataset, x='Year', palette='ch:.25')
plt.xticks(rotation=90)
plt.title('Number of Terrorist Attacks by Year')
plt.xlabel('Year')
plt.ylabel('Number of Attacks')
plt.show()

In [None]:
plt.figure(figsize=(8, 8))
attack_type_counts = dataset['Attack_type'].value_counts()
colors = sns.color_palette("ch:.25_r", n_colors=len(attack_type_counts))
plt.pie(attack_type_counts, labels=attack_type_counts.index, autopct='%1.1f%%', startangle=140, colors=colors)
plt.title('Distribution of Attack Types')
plt.show()

In [None]:
plt.subplots(figsize=(15,6))
sns.countplot(dataset['Target_type'],palette='ch:.25_r',order=dataset['Target_type'].value_counts().index)
plt.xticks(rotation=90)
plt.title('Attacks by Targets')
plt.show()

## Geographical Analysis

In [None]:

df = dataset.dropna(subset=['latitude', 'longitude'])
projection = ccrs.PlateCarree()

# scatter plot of attack locations
plt.figure(figsize=(10, 6))
ax = plt.axes(projection=projection)

ax.scatter(df['longitude'], df['latitude'], s=5, color='red', transform=ccrs.PlateCarree())

ax.coastlines()
ax.add_feature(ccrs.cartopy.feature.BORDERS, linestyle=':')
ax.add_feature(ccrs.cartopy.feature.LAND, edgecolor='black')
ax.set_title('Terrorist Attack Locations')

plt.show()

In [None]:
print('Country with Highest Terrorist Attacks:',dataset['Country'].value_counts().index[0])
print('Region with Highest Terrorist Attacks:',dataset['Region'].value_counts().index[0])

In [None]:
coun_terror=dataset['Country'].value_counts()[:15].to_frame()
coun_terror.columns=['Attacks']
coun_kill=dataset.groupby('Country')['Killed'].sum().to_frame()
coun_terror.merge(coun_kill,left_index=True,right_index=True,how='left').plot.bar(width=0.9)
fig=plt.gcf()
fig.set_size_inches(18,6)
plt.show()

In [None]:
sns.barplot(dataset['Group'].value_counts()[1:15].values,dataset['Group'].value_counts()[1:15].index,palette=('ch:.25_r'))
plt.xticks(rotation=90)
fig=plt.gcf()
fig.set_size_inches(10,8)
plt.title('Terrorist Groups with Highest Terror Attacks')
plt.show()

## Terrorism in Kenya

In [None]:
terror_kenya=dataset[dataset['Country']=='Kenya']

f,ax=plt.subplots(1,2,figsize=(25,12))
ke_groups=terror_kenya['Group'].value_counts()[1:11].index
ke_groups=terror_kenya[terror_kenya['Group'].isin(ke_groups)]
sns.countplot(y='Group',data=ke_groups,ax=ax[0])
ax[0].set_title('Top Terrorist Groups')
sns.countplot(y='Attack_type',data=terror_kenya,ax=ax[1])
ax[1].set_title('Common Attack Types')
plt.subplots_adjust(hspace=0.3,wspace=0.6)
ax[0].tick_params(labelsize=15)
ax[1].tick_params(labelsize=15)
plt.show()