**Dief Mohamed Dief**

Data Science and Business Analytics Intern @ The Sparks Foundation

Topic : Exploratory Data Analysis (EDA) - Terrorism

Dataset : globalterrorismdb_0718dist.csv [https://bit.ly/2TK5Xn5]

In [None]:
#import the required libraries
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd
import seaborn as sns
%matplotlib inline

In [None]:
#read the data
df = pd.read_csv("/content/globalterrorismdb_0718dist.csv" , encoding = 'ISO-8859-1')
df.head()

In [None]:
df.tail()

In [None]:
df.columns.values

In [None]:
df.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day','gname':'Group','country_txt':'Country','region_txt':'Region','provstate':'State' ,'city':'City' ,
                   'latitude':'Latitude', 'longitude':'Longitude' ,'summary':'summary','attacktype1_txt':'Attacktype','targtype1_txt':'Targettype','weaptype1_txt':'Weapon','nkill':'kill',
     'nwound':'Wound'},inplace=True)

df.columns.values

In [None]:
df = df[['Year','Month','Day','Country','State','Region','City','Latitude','Longitude',"Attacktype",'kill',
               'Wound','target1','summary','Group','Targettype','Weapon','motive']]

In [None]:
df.head()

In [None]:
#remove the duplicated rows
df.drop_duplicates(inplace = False)

In [None]:
df.isnull().sum()

In [None]:
#fill the none values in wound and kill columns with (0)
df['Wound'] = df['Wound'].fillna(0)
df['kill'] = df['kill'].fillna(0)
#insert a new column for human_damages
df["human_damages"] = df['Wound'] + df['kill']

In [None]:
df.info()

In [None]:
#show the statistics calculations
df.describe()

In [None]:
df.nunique()

In [None]:
#visualize the number of attacks in each year
year = df['Year'].unique()
years_count = df['Year'].value_counts(dropna = False).sort_index()
plt.figure(figsize = (18,10))
sns.barplot(x = year,
           y = years_count,
           palette = "tab10")
plt.xticks(rotation = 50)
plt.xlabel('Attacking Year',fontsize=20)
plt.ylabel('Number of Attacks Each Year',fontsize=20)
plt.title('Attacks In Years',fontsize=30)
plt.show()



In [None]:
#visualize the Number of Attacks By Region In Each Year
pd.crosstab(df["Year"], df["Region"]).plot(kind='area',stacked=False,figsize=(20,10))
plt.title('Terrorist Activities By Region In Each Year',fontsize=25)
plt.ylabel('Number of Attacks',fontsize=20)
plt.xlabel("Year",fontsize=20)
plt.show()

In [None]:
attacks = df["Country"].value_counts()[ :20]
attacks

In [None]:
#visualize the numper of attacks in each country 
#we will visualize on the top 20 countries

attacks = df["Country"].value_counts()[ :10]
plt.figure(figsize = (18 ,10))
sns.barplot(x = df["Country"].value_counts()[ :10].index,
           y = attacks,
           palette = "Paired")
plt.title("Top 10 Countries Affected " , fontsize = 30)
plt.xlabel("Country" , fontsize = 20)
plt.ylabel("Number of Attacks" , fontsize = 20)
plt.show()

In [None]:
df[['Year','kill']].groupby(["Year"],axis=0).sum().plot(kind='bar',figsize=(20,10),color=['darkslateblue'])
plt.xticks(rotation=50)
plt.title('Number of killed Each Year',fontsize=20)
plt.ylabel('Number of people',fontsize=15)
plt.xlabel('Year',fontsize=15)
plt.show()


In [None]:
#visualize the most affected cities
df['City'].value_counts().to_frame().sort_values('City',axis=0,ascending=False).head(10).plot(kind='bar',figsize=(20,10),color='blue')
plt.title("The Top 10 Affected Cities",fontsize=20)
plt.xlabel("City" ,fontsize=15)
plt.ylabel("Number of Attack",fontsize=15)
plt.show()

In [None]:
#visualize the name of attacktype
df["Attacktype"].value_counts().plot(kind = "bar" , figsize = (18,10) , color = "blue")
plt.title("Name of Attacktype" ,fontsize=20)
plt.xlabel("Attacktype",fontsize=15)
plt.ylabel("Number of Attack",fontsize=15)
plt.show()

In [None]:
#visualize the number of killed people vs the attacktype
df[["Attacktype" , "kill"]].groupby(["Attacktype"] , axis =0).sum().plot(kind="bar" , figsize = (18,10) , color ="red")
plt.xticks(rotation = 50)
plt.title("Number of Killed people vs The Attacktype",fontsize=20)
plt.xlabel("Attacktype",fontsize=15)
plt.ylabel("Number of Killed People",fontsize=15)
plt.show()

In [None]:
#visualize the number of Wound people vs the attacktype
df[["Attacktype" , "Wound"]].groupby(["Attacktype"] , axis =0).sum().plot(kind="bar" , figsize = (18,10) , color ="orange")
plt.xticks(rotation = 50)
plt.title("Number of Wounded people vs The Attacktype",fontsize=20)
plt.xlabel("Attacktype",fontsize=15)
plt.ylabel("Number of Wounded People",fontsize=15)
plt.show()

In [None]:
plt.subplots(figsize=(20,10))
sns.countplot(df["Targettype"],order=df['Targettype'].value_counts().index,palette="gist_heat",edgecolor=sns.color_palette("mako"));
plt.xticks(rotation=90)
plt.xlabel("Attacktype",fontsize=15)
plt.ylabel("count",fontsize=15)
plt.title("Attack per year",fontsize=20)
plt.show()

In [None]:
df['Group'].value_counts().to_frame().drop('Unknown')[ :10].plot(kind='bar',color='yellow',figsize=(20,10))
plt.title("Top 10 terrorist group attack",fontsize=20)
plt.xlabel("terrorist group name",fontsize=15)
plt.ylabel("Attack number",fontsize=15)
plt.show()

In [None]:
df[['Group','kill']].groupby(['Group'],axis=0).sum().drop('Unknown').sort_values('kill',ascending=False).head(10).plot(kind='bar',color='red',figsize=(20,10))
plt.title("Top 10 terrorist group attack",fontsize=20)
plt.xlabel("terrorist group name",fontsize=15)
plt.ylabel("No of killed people",fontsize=15)
plt.show()

In [None]:
df1=df[['Group','Country','kill']]
df1=df1.groupby(['Group','Country'],axis=0).sum().sort_values('kill',ascending=False).drop('Unknown').reset_index().head(10)
df1

In [None]:
kill = df.loc[:,'kill']
print('Number of people killed by terror attack:', int(sum(kill.dropna())))

In [None]:
nkilled_due_to_attacktype = df.pivot_table(columns='Attacktype', values='kill', aggfunc='sum')
nkilled_due_to_attacktype

In [None]:
nkilled_at_each_country = df.pivot_table(columns='Country', values='kill', aggfunc='sum')
nkilled_at_each_country

**Conclusion and Results :**
      

1.   Country with the most attacks is : **Iraq**
2.   City with the most attacks is : **Baghdad**
3.   Region with the most attacks is: **South Asia**
4.   Year with the most attacks is : **2011**
5.   Month with the most attacks is : **5** 
6.   Group with the most attacks is: **Shining Path (SL)**
7.   Most Attack Types is: **Bombing/Explosion**












