# **GLOBAL TERRORISM ANALYSIS**




## **PART 2 : DATA PREPROCESSING**

**Author**: Samarpan Das



---



---

Importing necessary libraries

In [20]:
import time
import matplotlib.pyplot as plt 
import matplotlib.ticker as ticker
from matplotlib import animation
import numpy as np
import pandas as pd
import seaborn as sns

Connecting colab to google drive

In [21]:
from google.colab import drive 
drive.mount('/content/drive')

Mounted at /content/drive


Importing data from google drive

In [22]:
# BaseForAnalysis.csv was uploaded into google drve before hand
primary_df = pd.read_csv('/content/drive/My Drive/BaseForAnalysis.csv', sep=',', encoding="ISO-8859-1")

Initial layout of the data

In [23]:
primary_df.head(10)

Unnamed: 0,eventid,iyear,imonth,iday,extended,country_txt,region_txt,city,latitude,longitude,vicinity,crit1,multiple,success,suicide,attacktype1_txt,targtype1_txt,natlty1_txt,gname,nperps,claimed,weaptype1_txt,nkill,nkillter,nwound,propextent_txt,ishostkid,ransom,nreleased
0,197000000002,1970,0,0,0,Mexico,North America,Mexico city,19.371887,-99.086624,0,1,0,1,0,Hostage Taking (Kidnapping),Government (Diplomatic),Belgium,23rd of September Communist League,7.0,,Unknown,0.0,,0.0,,1.0,1.0,
1,197001000001,1970,1,0,0,Philippines,Southeast Asia,Unknown,15.478598,120.599741,0,1,0,1,0,Assassination,Journalists & Media,United States,Unknown,,,Unknown,1.0,,0.0,,0.0,0.0,
2,197001000002,1970,1,0,0,Greece,Western Europe,Athens,37.99749,23.762728,0,1,0,1,0,Bombing/Explosion,Government (Diplomatic),United States,Unknown,,,Explosives,,,,,0.0,0.0,
3,197001000003,1970,1,0,0,Japan,East Asia,Fukouka,33.580412,130.396361,0,1,0,1,0,Facility/Infrastructure Attack,Government (Diplomatic),United States,Unknown,,,Incendiary,,,,,0.0,0.0,
4,197001010002,1970,1,1,0,United States,North America,Cairo,37.005105,-89.176269,0,1,0,1,0,Armed Assault,Police,United States,Black Nationalists,-99.0,0.0,Firearms,0.0,0.0,0.0,Minor (likely < $1 million),0.0,0.0,
5,197001050001,1970,1,1,0,United States,North America,Baraboo,43.4685,-89.744299,0,1,0,0,0,Bombing/Explosion,Military,United States,"Weather Underground, Weathermen",,,Explosives,0.0,,0.0,Minor (likely < $1 million),0.0,0.0,
6,197001020001,1970,1,2,0,Uruguay,South America,Montevideo,-34.891151,-56.187214,0,1,0,0,0,Assassination,Police,Uruguay,Tupamaros (Uruguay),3.0,,Firearms,0.0,,0.0,,0.0,0.0,
7,197001020002,1970,1,2,0,United States,North America,Oakland,37.791927,-122.225906,0,1,0,1,0,Bombing/Explosion,Utilities,United States,Unknown,-99.0,0.0,Explosives,0.0,0.0,0.0,Minor (likely < $1 million),0.0,0.0,
8,197001020003,1970,1,2,0,United States,North America,Madison,43.076592,-89.412488,0,1,0,1,0,Facility/Infrastructure Attack,Military,United States,New Year's Gang,1.0,1.0,Incendiary,0.0,0.0,0.0,Minor (likely < $1 million),0.0,0.0,
9,197001030001,1970,1,3,0,United States,North America,Madison,43.07295,-89.386694,0,1,0,1,0,Facility/Infrastructure Attack,Government (General),United States,New Year's Gang,1.0,0.0,Incendiary,0.0,0.0,0.0,Minor (likely < $1 million),0.0,0.0,


In [24]:
print ('dataframe shape: ', primary_df.shape)

dataframe shape:  (201183, 29)


In [29]:
print ('Existance of nul values', primary_df.isnull().values.any())
print ('Total number of null values in entire dataframe: ', primary_df.isnull().sum().sum())

Existance of nul values True
Total number of null values in entire dataframe:  688534


In [30]:
print ('Number of null values in named col of dataframe: ', primary_df.isnull().sum())

Number of null values in named col of dataframe:  eventid                 0
iyear                   0
imonth                  0
iday                    0
extended                0
country_txt             0
region_txt              0
city                  426
latitude             4627
longitude            4628
vicinity                0
crit1                   0
multiple                0
success                 0
suicide                 0
attacktype1_txt         0
targtype1_txt           0
natlty1_txt          1850
gname                   0
nperps              71095
claimed             66094
weaptype1_txt           0
nkill               11950
nkillter            67867
nwound              18924
propextent_txt     130685
ishostkid             178
ransom             121622
nreleased          188588
dtype: int64


### Changing the content and features of the data

Renaming certain columns to better identifiable names

In [36]:
primary_df.rename(columns = 
                  {'iyear':'year', 
                   'imonth':'month',
                   'iday':'day',
                   'country_txt' : 'country',
                   'region_txt' : 'region',
                   'crit1' : 'crit',
                   'attacktype1_txt' : 'attacktype',
                   'targtype1_txt' : 'targettype',
                   'natlty1_txt' : 'nationalityofvic',
                   'gname' : 'organisation',
                   'claimed' : 'claimedresp',
                   'weaptype1_txt' : 'weapontype',
                   'nkill' : 'nkilled',
                   'nkillter' : 'nkillonlyter',
                   'nwound' : 'nwounded',
                   'propextent_txt' : 'propdamageextent',
                   'ishostkid' : 'victimkidnapped',
                   'ransom' : 'ransomdemanded',
                   }, inplace = True)

In [39]:
#Add column ncasualties (Number of Dead/Injured people) by adding Nkill and Nwound
primary_df['ncasualties'] = primary_df['nkilled'] + primary_df['nwounded']

In [43]:
# Limit long strings
primary_df['weapontype'] = primary_df['weapontype'].replace(u'Vehicle (not to include vehicle-borne explosives, i.e., car or truck bombs)', 'Vehicle')


primary_df['propdamageextent'] = primary_df['propdamageextent'].replace('Minor (likely < $1 million)', 'Minor')
primary_df['propdamageextent'] = primary_df['propdamageextent'].replace('Major (likely > $1 million but < $1 billion)', 'Major')
primary_df['propdamageextent'] = primary_df['propdamageextent'].replace('Catastrophic (likely > $1 billion)', 'Catastrophic')

.....

....

### Glimpse of the final preprocessed data

In [53]:
primary_df.head(10)

Unnamed: 0,eventid,year,month,day,extended,country,region,city,latitude,longitude,vicinity,crit,multiple,success,suicide,attacktype,targettype,nationality,organisation,nperps,claimedresp,weapontype,nkilled,nkillonlyter,nwounded,propdamageextent,victimkidnapped,ransomdemanded,nreleased,ncasualties
0,197000000002,1970,0,0,0,Mexico,North America,Mexico city,19.371887,-99.086624,0,1,0,1,0,Hostage Taking (Kidnapping),Government (Diplomatic),Belgium,23rd of September Communist League,7.0,,Unknown,0.0,,0.0,,1.0,1.0,,0.0
1,197001000001,1970,1,0,0,Philippines,Southeast Asia,Unknown,15.478598,120.599741,0,1,0,1,0,Assassination,Journalists & Media,United States,Unknown,,,Unknown,1.0,,0.0,,0.0,0.0,,1.0
2,197001000002,1970,1,0,0,Greece,Western Europe,Athens,37.99749,23.762728,0,1,0,1,0,Bombing/Explosion,Government (Diplomatic),United States,Unknown,,,Explosives,,,,,0.0,0.0,,
3,197001000003,1970,1,0,0,Japan,East Asia,Fukouka,33.580412,130.396361,0,1,0,1,0,Facility/Infrastructure Attack,Government (Diplomatic),United States,Unknown,,,Incendiary,,,,,0.0,0.0,,
4,197001010002,1970,1,1,0,United States,North America,Cairo,37.005105,-89.176269,0,1,0,1,0,Armed Assault,Police,United States,Black Nationalists,-99.0,0.0,Firearms,0.0,0.0,0.0,Minor,0.0,0.0,,0.0
5,197001050001,1970,1,1,0,United States,North America,Baraboo,43.4685,-89.744299,0,1,0,0,0,Bombing/Explosion,Military,United States,"Weather Underground, Weathermen",,,Explosives,0.0,,0.0,Minor,0.0,0.0,,0.0
6,197001020001,1970,1,2,0,Uruguay,South America,Montevideo,-34.891151,-56.187214,0,1,0,0,0,Assassination,Police,Uruguay,Tupamaros (Uruguay),3.0,,Firearms,0.0,,0.0,,0.0,0.0,,0.0
7,197001020002,1970,1,2,0,United States,North America,Oakland,37.791927,-122.225906,0,1,0,1,0,Bombing/Explosion,Utilities,United States,Unknown,-99.0,0.0,Explosives,0.0,0.0,0.0,Minor,0.0,0.0,,0.0
8,197001020003,1970,1,2,0,United States,North America,Madison,43.076592,-89.412488,0,1,0,1,0,Facility/Infrastructure Attack,Military,United States,New Year's Gang,1.0,1.0,Incendiary,0.0,0.0,0.0,Minor,0.0,0.0,,0.0
9,197001030001,1970,1,3,0,United States,North America,Madison,43.07295,-89.386694,0,1,0,1,0,Facility/Infrastructure Attack,Government (General),United States,New Year's Gang,1.0,0.0,Incendiary,0.0,0.0,0.0,Minor,0.0,0.0,,0.0


In [56]:
print ('final dataframe shape: ', primary_df.shape)

final dataframe shape:  (201183, 30)


In [54]:
#Check for general information
primary_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 201183 entries, 0 to 201182
Data columns (total 30 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   eventid           201183 non-null  int64  
 1   year              201183 non-null  int64  
 2   month             201183 non-null  int64  
 3   day               201183 non-null  int64  
 4   extended          201183 non-null  int64  
 5   country           201183 non-null  object 
 6   region            201183 non-null  object 
 7   city              200757 non-null  object 
 8   latitude          196556 non-null  float64
 9   longitude         196555 non-null  float64
 10  vicinity          201183 non-null  int64  
 11  crit              201183 non-null  int64  
 12  multiple          201183 non-null  int64  
 13  success           201183 non-null  int64  
 14  suicide           201183 non-null  int64  
 15  attacktype        201183 non-null  object 
 16  targettype        20

In [57]:
# Converting the dataframe to a csv file and uploading it to google drive with the name BaseForAnalysis_Version2.csv
primary_df.to_csv("/content/drive/My Drive/BaseForAnalysis_Version2.csv", sep = ",")

.....

### End of data preprocessing 


