# The Spark Foundation
## By: Karan 
## Task-4 Global Terrorism
### Problem statment
#### ● Perform ‘Exploratory Data Analysis’ on dataset ‘Global Terrorism’
#### ● As a security/defense analyst, try to find out the hot zone of terrorism.
#### ● What all security issues and insights you can derive by EDA?
#### ● dataset: https://bit.ly/2TK5Xn5

# Importing libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import folium
import folium.plugins
import io
from mpl_toolkits.basemap import Basemap
from matplotlib import animation,rc
import base64

ModuleNotFoundError: No module named 'mpl_toolkits.basemap'

# Reading csv

In [None]:
data=pd.read_csv("globalterrorismdb.csv",encoding='ISO-8859-1')
data

# Dropping columns where more than 160000 records are empty


In [None]:
data_new = data.dropna(thresh=160000,axis=1)


In [None]:
data_new

In [None]:
data_new.shape

In [None]:
data_new.isnull().sum()

# EDA

In [None]:
data_new['casualities']=data_new['nkill']+data_new['nwound']

print('Country with Highest Terrorist Attacks:',data_new['country_txt'].value_counts().index[0])
print('Regions with Highest Terrorist Attacks:',data_new['region_txt'].value_counts().index[0])
print('Maximum people killed in an attack are:',data_new['nkill'].max(),'that took place in',data_new.loc[data_new['nkill'].idxmax()].country_txt)
print("Year with the most attacks:",data_new['iyear'].value_counts().idxmax())
print("Month with the most attacks:",data_new['imonth'].value_counts().idxmax())
print("Most Attack Types:",data_new['attacktype1_txt'].value_counts().idxmax())

# Terrorist Groups with most attacks

In [None]:
sns.barplot(data_new['gname'].value_counts()[1:10].values,data_new['gname'].value_counts()[1:10].index,palette='Set1')
plt.xticks(rotation=90)
fig=plt.gcf()
fig.set_size_inches(10,8)
plt.title('Terrorist Groups with Highest Terror Attacks')
plt.show()


# Countries with highest terrorist attacks

In [None]:
print(f"The highest terrorist attacks were commited in {data_new.country_txt.value_counts().index[0]} with {data_new.country.value_counts().max()} attacks")

print('\nThe other 9 countries with highest terrorist attacks are:')
for i in range(1,10):
    print(f"{i+1}. {data_new.country_txt.value_counts().index[i]} with {data_new.country_txt.value_counts()[i]} attacks")

#Visualization
plt.subplots(figsize=(15,6))
sns.barplot(data_new['country_txt'].value_counts()[:10].index,data_new['country_txt'].value_counts()[:10].values,palette='Set1')
plt.title('Top Countries Affected')
plt.xlabel('Countries')
plt.ylabel('Count')
plt.xticks(rotation= 90)
plt.show()

# Number Of Terrorist Activities per Year

In [None]:
f, ax = plt.subplots(figsize=(10, 7))
plt.title('Number Of Terrorist Activities per Year')
sns.despine(f)
sns.distplot(data_new['iyear'], bins=20,color="g")

## Here we can observe the number of terrorist activities have gone up sharply after 2010.

# Terrorist Activities by Region each Year

In [None]:
pd.crosstab(data_new.iyear, data_new.region_txt).plot(kind='area',figsize=(15,6))
plt.title('Terrorist Activities by Region each Year')
plt.ylabel('Number of Attacks')
plt.show()


# Number Of Casualities Each Year

In [None]:
plt.subplots(figsize=(10,7))
year_casual = data_new.groupby('iyear').casualities.sum().to_frame().reset_index()
year_casual.columns = ['Year','Casualities']
plt.title('Number Of Casualities Each Year')
sns.lineplot(x='Year', y='Casualities', data=year_casual,palette="Set2",color="g")

# Most common target

In [None]:
plt.subplots(figsize=(15,6))
sns.countplot(data_new['targtype1_txt'],palette='Set1',order=data_new['targtype1_txt'].value_counts().index)
plt.xticks(rotation=90)
plt.title('Most common target')
plt.show()

# World-wide map of Terrorism

# Attacks have been grouped in three different size and three different color.The size and colors are based on the Killed numbers of each attack.You can check this logic in the code below:

In [None]:
terror_fol=data_new.copy()
terror_fol.dropna(subset=['latitude','longitude'],inplace=True)
location_fol=terror_fol[['latitude','longitude']][:8000]
country_fol=terror_fol['country_txt'][:8000]
city_fol=terror_fol['city'][:8000]
killed_fol=terror_fol['nkill'][:8000]
wound_fol=terror_fol['nwound'][:8000]
def color_point(x):
    if x>=30:
        color='red'
    elif ((x>0 and x<30)):
        color='blue'
    else:
        color='orange'
    return color   
def point_size(x):
    if (x>30 and x<100):
        size=2
    elif (x>=100 and x<500):
        size=8
    elif x>=500:
        size=16
    else:
        size=0.5
    return size   
map2 = folium.Map(location=[30,0],tiles='cartodbpositron',zoom_start=2)
for point in location_fol.index:
    info='<b>Country: </b>'+str(country_fol[point])+'<br><b>City: </b>: '+str(city_fol[point])+'<br><b>Killed </b>: '+str(killed_fol[point])+'<br><b>Wounded</b> : '+str(wound_fol[point])
    iframe = folium.IFrame(html=info, width=200, height=200)
    folium.CircleMarker(list(location_fol.loc[point].values),popup=folium.Popup(iframe),radius=point_size(killed_fol[point]),color=color_point(killed_fol[point])).add_to(map2)
map2

# Click on markers for more information.
## Focussing on India
## Fron this part on, we do the same activities and plots, for India.

# Terror Activities in India

In [None]:
terror_india=data_new[data_new['country_txt']=='India']
terror_india_fol=terror_india.copy()
terror_india_fol.dropna(subset=['latitude','longitude'],inplace=True)
location_ind=terror_india_fol[['latitude','longitude']][:5000]
city_ind=terror_india_fol['city'][:5000]
killed_ind=terror_india_fol['nkill'][:5000]
wound_ind=terror_india_fol['nwound'][:5000]
target_ind=terror_india_fol['targtype1_txt'][:5000]

map4 = folium.Map(location=[20.59, 78.96],tiles='cartodbpositron',zoom_start=4.5)
for point in location_ind.index:
    folium.CircleMarker(list(location_ind.loc[point].values),popup='<b>City: </b>'+str(city_ind[point])+'<br><b>Killed: </b>'+str(killed_ind[point])+\
                        '<br><b>Injured: </b>'+str(wound_ind[point])+'<br><b>Target: </b>'+str(target_ind[point]),radius=point_size(killed_ind[point]),color=color_point(killed_ind[point]),fill_color=color_point(killed_ind[point])).add_to(map4)
map4

# How did terrorism spread in India