# Global Terrorism Attacks - Predicting The Responsible Group

**Table of Contents**
<div id="toc"></div>

## Load data

In [1]:
%matplotlib inline

from collections import Counter
import pandas as pd
import csv
from sklearn import preprocessing
import os.path

import ConfigParser
config = ConfigParser.ConfigParser()
config.read('config.ini')
gtdConvertedFilename = 'gtd_converted.csv'

if not os.path.isfile(gtdConvertedFilename):
    %run CreateCSVFile.py

gtd = pd.read_csv(gtdConvertedFilename, encoding='latin1', low_memory=False)
gtd.tail(3)

ModuleNotFoundError: No module named 'ConfigParser'

## Examine Data

In [4]:
target = gtd['gname']
gcount = Counter(target)
print('No. of groups = {}'.format(len(gcount)))
g1 = gcount.most_common(1)[0]
print('Most common group, {} = {:.4f}%'.format(g1[0], 100 * g1[1] / target.size))
gcount.most_common(5)

No. of groups = 3454
Most common group, Unknown = 45.0000%


[(u'Unknown', 78306),
 (u'Taliban', 6575),
 (u'Shining Path (SL)', 4551),
 (u'Islamic State of Iraq and the Levant (ISIL)', 4287),
 (u'Farabundo Marti National Liberation Front (FMLN)', 3351)]

## Preprocessing

### Filter Groups

In [6]:
from collections import Counter

group_count = Counter(gtd['gname'])
# Remove groups with 3 attacks or less
filtered_groups = [group for group, counter in group_count.items() if counter > 3]
# Remove 'Unknown'
filtered_groups.remove('Unknown')

gtd = gtd[gtd['gname'].isin(filtered_groups)]

gtd.shape

(88657, 136)

### Define Features

In [7]:
if config.getboolean('Booleans', 'UseLessFeatures') == True:
    columns_to_keep = ['gname', 'iyear', 'country', 'attacktype1', 'weaptype1', 'targtype1']
else:
    columns_to_keep = ['gname', 'iyear', 'country', 'crit1', 'crit2', 'crit3', 'attacktype1', 'targtype1',
                       'targsubtype1','weaptype1', 'weapsubtype1', 'ransom']
    
gtd = gtd[columns_to_keep]
gtd.tail(3)

Unnamed: 0,gname,iyear,country,attacktype1,weaptype1,targtype1
170345,Boko Haram,2016,146,9,13,4
170347,Lord's Resistance Army (LRA),2016,229,6,13,14
170349,Sudan Liberation Movement,2016,195,2,5,14


### Transform Target

In [8]:
gtd.apply(preprocessing.LabelEncoder().fit_transform);

### Fill NaNs

In [9]:
gtd=gtd.fillna(0) #TODO: 0?

gtd.tail(3)

Unnamed: 0,gname,iyear,country,attacktype1,weaptype1,targtype1
170345,Boko Haram,2016,146,9,13,4
170347,Lord's Resistance Army (LRA),2016,229,6,13,14
170349,Sudan Liberation Movement,2016,195,2,5,14


## Save

In [10]:
if config.getboolean('Booleans', 'UseLessFeatures') == True:
    csvFileName = 'gtd_processed_5features.csv'
else:
    csvFileName = 'gtd_processed_11features.csv'

In [7]:
gtd.to_csv(csvFileName, encoding='utf-8', index=False)