# EXPLORETORY DATA ANALYSIS 

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import folium
from folium.plugins import MarkerCluster
df = pd.read_csv(r'E:\globalterrorism.csv\globalterrorismdb_0718dist.csv',encoding='latin1')
df.head()

## SUMMARY OF DATAFRAME

In [None]:
df.info()

## ANALYSING COMPLETE DATASET AND COLUMNS

In [None]:
df.describe()

## FINDING RELATION BETWEEN DATAFRAMES

In [None]:
df.corr()

## COLUMNS IN DATAFRAME

In [None]:
df.columns

## UNITED STATES TERRORIST ATTACKS AND DEATH INJURIES

In [None]:
df.nkillus.plot(kind = 'line', color = 'green', label = 'TOTAL DEATHS IN United States', linewidth = 2, alpha = 0.8, grid = True, 
                 linestyle = ':', figsize = (20,20), fontsize=20)
df.nwoundus.plot(color = "orange", label = 'TOTAL NON-FATAL INJURIES', linewidth = 2, alpha = 0.8, grid = True, 
                 linestyle = '-.', figsize = (20,20), fontsize=20)



plt.legend(loc='upper right')    
plt.xlabel('Data Index', fontsize=20)           
plt.ylabel('TOTAL DEATHS OR INJURIES', fontsize=20)
plt.title('TOTAL DEATHS AND NON-FATAL INJURIES IN UNITED STATES')         
plt.show()

#### CONCLUSION: Given that the data is sorted by dates, attacks on US citizens seem to be very rare in a given date range. But the terrorist act against the citizens of US has been increasingly in the following year after this rare date range. By finding the date of the start of the increase, the factors in increasing terrorist acts can be easily identified by taking into account the changes and developments in the country after this date.

## TOTAL DEATHS AND INJURIES OF ALL TIME

In [None]:
df.plot(kind = 'scatter', x = 'nkill', y = 'nwound', alpha = 0.7, color = 'red', figsize = (20,20), fontsize=17)
plt.xlabel('Kills', fontsize=17)
plt.ylabel('Woundings', fontsize=17)
plt.title('SCATTER PLOT: KILLS & WOUNDS')
plt.show()

## TERRORISM ACTION IN SPECIFIC REGION

In [None]:
middleEastData = df[df['region'] == 10]
middleEastData.iyear.plot(kind = 'hist', bins = 32, figsize = (20,20), color = 'green', fontsize=17)
plt.xlabel('Year', fontsize=15)
plt.ylabel('Frequency', fontsize=15)
plt.title('FREQUENCY OF TERRORISM ACTION W.R.T. YEARS IN Middle East & North Africa')
plt.show()

### ----------------- TERRORIST ATTACKS IN 1970 AND LOCATIONS AFFECTED BY IT---------------------

In [None]:
filterYear = df['iyear'] == 1970
filterData = df[filterYear]

In [None]:

reqFilterData = filterData.loc[:,'city':'longitude'] 
reqFilterData = reqFilterData.dropna() 
reqFilterDataList = reqFilterData.values.tolist()
# reqFilterDataList
map = folium.Map(location = [0, 30], tiles='CartoDB positron', zoom_start=2)
# clustered marker
markerCluster = folium.plugins.MarkerCluster().add_to(map)
for point in range(0, len(reqFilterDataList)):
    folium.Marker(location=[reqFilterDataList[point][1],reqFilterDataList[point][2]], popup = reqFilterDataList[point][0]).add_to(markerCluster)
map

#### 84% of the terrorist attacks in 1970 were carried out on the American continent. Middle East and North Africa, currently being the centre of terrorism, in 1970,faced only one terrorist attack

## TOTAL CASUALTIES IN TERRORIST ATTACK

In [None]:
df = pd.read_csv(r"E:\globalterrorism.csv\globalterrorismdb_0718dist.csv",encoding='latin1')
killData = df.loc[:,'nkill']
print('TOTAL CASUALTIES IN TERRORIST ATTACK:', int(sum(killData.dropna())))

#### TOTAL CASUALTIES IN TERRORIST ATTACK : 411868

## Data types of column attributes

In [None]:
attackData = df.loc[:,'attacktype1':'attacktype1_txt']

typeKillData = pd.concat([attackData, killData], axis=1)
typeKillFormatData = typeKillData.pivot_table(columns='attacktype1_txt', values='nkill', aggfunc='sum')
typeKillFormatData

In [None]:
typeKillFormatData.info()


## REASONS BEHIND THESE DEATHS

In [None]:

killData = df.loc[:,'nkill']

attackData = df.loc[:,'attacktype1':'attacktype1_txt']

#total data for the attacks
typeKillData = pd.concat([attackData, killData], axis=1)
typeKillData

In [None]:
typeKillFormatData = typeKillData.pivot_table(columns='attacktype1_txt', values='nkill', aggfunc='sum')
typeKillFormatData

In [None]:
countryData = df.loc[:,'country':'country_txt']
countryData

## PLOTTING KILLED PEOPLE VS COUNTRIES

In [None]:

#information for all countries involved
countryKillData = pd.concat([countryData, killData], axis=1) 
countryKillFormatData = countryKillData.pivot_table(columns='country_txt', values='nkill', aggfunc='sum')

fig_size = plt.rcParams["figure.figsize"]
fig_size[0]=25
fig_size[1]=25
plt.rcParams["figure.figsize"] = fig_size

labels = countryKillFormatData.columns.tolist()
labels = labels[:50] #50 bar provides nice view
index = np.arange(len(labels))
transpoze = countryKillFormatData.T
values = transpoze.values.tolist()
values = values[:50]
values = [int(i[0]) for i in values] # convert float to int
colors = ['orange', 'blue', 'green', 'yellow', 'brown', 'gray', 'black', 'red', 'magenta', 'green'] # color list for bar chart bar color 
fig, ax = plt.subplots(1, 1)
ax.yaxis.grid(True)
fig_size = plt.rcParams["figure.figsize"]
fig_size[0]=25
fig_size[1]=25
plt.rcParams["figure.figsize"] = fig_size
plt.bar(index, values, color = colors, width = 1.0)
plt.ylabel('Dead People', fontsize=23)
plt.xticks(index, labels, fontsize=20, rotation=90)
plt.yticks( fontsize=20, rotation=90)
plt.title('Number of people Deaths caused by countries')
# print(fig_size)
plt.show()

In [None]:

labels = countryKillFormatData.columns.tolist()
labels = labels[50:101]
index = np.arange(len(labels))
transpoze = countryKillFormatData.T
values = transpoze.values.tolist()
values = values[50:101]
values = [int(i[0]) for i in values]
colors = ['red', 'green', 'blue', 'purple', 'yellow', 'brown', 'black', 'gray', 'magenta', 'orange']
fig, ax = plt.subplots(1, 1)
ax.yaxis.grid(True)
fig_size = plt.rcParams["figure.figsize"]
fig_size[0]=25
fig_size[1]=25
plt.rcParams["figure.figsize"] = fig_size
plt.bar(index, values, color = colors, width = 1.0)
plt.ylabel('Killed People', fontsize=15)
plt.xticks(index, labels, fontsize=15, rotation=88)
plt.title('Number of people Death caused by countries')
plt.yticks( fontsize=20, rotation=90)
plt.show()

In [None]:
labels = countryKillFormatData.columns.tolist()
labels = labels[152:206]
index = np.arange(len(labels))
transpoze = countryKillFormatData.T
values = transpoze.values.tolist()
values = values[152:206]
values = [int(i[0]) for i in values]
colors = ['red', 'green', 'blue', 'purple', 'yellow', 'brown', 'black', 'gray', 'magenta', 'orange']
fig, ax = plt.subplots(1, 1)
ax.yaxis.grid(True)
fig_size = plt.rcParams["figure.figsize"]
fig_size[0]=25
fig_size[1]=25
plt.rcParams["figure.figsize"] = fig_size
plt.bar(index, values, color = colors, width = 0.8)
plt.ylabel('Killed People', fontsize=15)
plt.xticks(index, labels, fontsize=15, rotation=90)
plt.title('Number of people killed by countries')
plt.yticks( fontsize=20, rotation=90)
plt.show()

## CONCLUSION

### MOST ATTACKED CONTINENTAL PART: Middle East & North Africa

### MOST ATTACKED CITY: Baghdad

### MOST ATTACKED COUNTRY: Iraq

### Year with the most attacks: 2014

### Most Attack Types: Bombing/Explosion

### Group with the most attacks: Taliban

### Month with the most attacks: 5