## Global Terrorism Index
&emsp;A country’s annual GTI score is based on a unique scoring system to account for the relative impact of incidents in the year. 

&emsp;The four factors counted in each country’s
yearly score are:
- total number of terrorist incidents in a given year
- total number of fatalities caused by terrorists in a given year
- total number of injuries caused by terrorists in a given year
- a measure of the total property damage from terrorist incidents in a given year.

&emsp;Each of these factors are then are weighted accordingly to obtain the raw score and then cumulated with previous year raw scores with different weightage. The Final score is then log transformed to obtain the GTI score    

> Note: the datset used is a subset of the GTD data which is taken by using sqlite3 in DB Browser. Hope you can reproduce it

### Loading libraries and data

In [1]:
import pandas as pd # data manupulation library
import numpy as np # contains high-level mathematical functions to operate arrays

In [2]:
df = pd.read_csv("./country_scores2.csv") # loading the subset of the data  
df.head() # printing the 1st few rows

Unnamed: 0,country_txt,iyear,nkill,eventid,nwound,property,propextent
0,United Kingdom,1993,0.0,199301000000.0,2.0,0,
1,Egypt,1993,0.0,199301000000.0,0.0,1,3.0
2,Sri Lanka,1993,2.0,199301000000.0,0.0,-9,4.0
3,Israel,1993,1.0,199301000000.0,0.0,0,
4,Israel,1993,0.0,199301000000.0,0.0,1,3.0


In [5]:
df.fillna(0, inplace=True) # filling the NAN values with zeroes

In [6]:
df.isnull().sum() # checking for NUlls

country_txt    0
iyear          0
nkill          0
eventid        0
nwound         0
property       0
propextent     0
dtype: int64

In [7]:
# Grouping and aggregating the data
tmp = df.groupby(["country_txt", "iyear"]).agg({ # groupby country, year and initialize the aggregate    
    "nkill":"sum", # summing the fatalities
    "eventid":"count", # counting the no of attacks
    "nwound":"sum", # summing the injuries
    "property":"count", # counting the property damage
    "propextent": lambda x: np.median(tuple(x)) #determinine the meadian property extent
})

tmp = tmp.reset_index(level=0).reset_index(level=0) # reset the index twice
tmp # printing the dataframe

Unnamed: 0,iyear,country_txt,nkill,eventid,nwound,property,propextent
0,1973,Afghanistan,0.0,1,1.0,1,0.0
1,1979,Afghanistan,53.0,3,1.0,3,0.0
2,1987,Afghanistan,0.0,1,2.0,1,0.0
3,1988,Afghanistan,128.0,11,106.0,11,2.0
4,1989,Afghanistan,10.0,10,84.0,10,0.0
...,...,...,...,...,...,...,...
4033,2013,Zimbabwe,1.0,3,1.0,3,3.0
4034,2014,Zimbabwe,0.0,1,0.0,1,3.0
4035,2017,Zimbabwe,0.0,3,1.0,3,3.0
4036,2018,Zimbabwe,2.0,2,47.0,2,1.5


### Raw Score

In [8]:
## a function to obtain the raw scores
def get_raw_score(z): 
    return z["eventid"] + z["nkill"] * 3 + z["nwound"] * 0.5 + (z["property"] * z["propextent"]) # returns the score 

tmp["raw_scores"] = tmp.apply(lambda z: get_raw_score(z), 1) # obtaining the required raw scores

In [9]:
tmp = tmp.sort_values(by=["country_txt", "iyear"], ascending=False) # sorting the scores by country and year
tmp = tmp.reset_index(level=0).drop("index", 1) # reset the index

In [10]:
tmp2 = tmp.iloc[:,0:2].values # subsetting the raw score data

In [11]:
## obtaining the GTI scores
GTI_scores = [] # initializing the empty list

# iterating through each value in the subset 
for x,y in tmp2:
    if x<=1974: continue # ignoring data before 1974
        
    a = float(tmp[(tmp["iyear"]==x) & (tmp["country_txt"]==y)]["raw_scores"]) # reference year score
    
    try:
        b =  float(tmp[(tmp["iyear"]==x-1) & (tmp["country_txt"]==y)]["raw_scores"]) # previous year score
    except:
        b=0 # setting the score to 0
        
    try:
        c =  float(tmp[(tmp["iyear"]==x-2) & (tmp["country_txt"]==y)]["raw_scores"]) # 2years ago score
    except:
        c=0 # setting the score to 0
    
    try:
        d =  float(tmp[(tmp["iyear"]==x-3) & (tmp["country_txt"]==y)]["raw_scores"]) # 3 years ago  score
    except:
        d=0 # setting the score to 0
    
    try:
        e =  float(tmp[(tmp["iyear"]==x-4) & (tmp["country_txt"]==y)]["raw_scores"]) # 4 years ago score
    except:
        e=0 #  setting the score to 0  
    
    GTI_scores.append((x, y, (a*16 + b*8 + c*4 + d*2 + e)/31)) # adding the score to the list of GTI scores

In [12]:
tmp3 = pd.DataFrame(GTI_scores, columns=["iyear", "country_txt", "GTI_score"]) # creating the GTI scores dataframe

In [13]:
finDf = tmp.merge(tmp3, on=["iyear", "country_txt"]) # merging the dataframes

## GTI scores

In [19]:
## obtaining the required params
min_score = finDf["GTI_score"].min() # minimum raw score
max_score = finDf["GTI_score"].max() # maximum raw score

low, high = 0, 10 # initialize 2 variables
root = 2*(high-low) # determining the root

score_range = 2*(max_score-min_score) # calculating the range

cut_off = (score_range)**(1/root) # obtaining the cut off

In [45]:
## calculating the bands
bands = [] # initializing an empty list

x, i, y = low, low,0 # initializing some variables
while(x!=high): # loop until x becomes high

    bands.append((i, x, y)) # append the values of i, x, y in  bands list
    
    x += 0.5  # increaing x by 0.5
    
    if i == 0: y += cut_off # increasing y by cut off value
    else: y *= cut_off # multiplying y by cut off
    
    i += 1 # incrementing i by 1

In [88]:
# creating a bands dataframe
bandsDf = pd.DataFrame(bands, columns=["BandNumber", "Bands", "BandCutoff"]) 

In [101]:
# creating the band dictionary mapper
bands_dct = {} # intializing empty dictionary
for i in range(20): # intialize a loop
    
    bands_dct[i] = (bands[i][1], bands[i][2]) # assigning band values to dictionary

In [117]:
bands_dct # printing th band dictionary

{0: (0, 0),
 1: (0.5, 1.765428170549937),
 2: (1.0, 3.116736625371298),
 3: (1.5, 5.502374638615235),
 4: (2.0, 9.714047191930865),
 5: (2.5, 17.14945256268626),
 6: (3.0, 30.276126663676134),
 7: (3.5, 53.45032690719193),
 8: (4.0, 94.36271284705992),
 9: (4.5, 166.59059150971405),
 10: (5.0, 294.10372319982633),
 11: (5.5, 519.2189980005945),
 12: (6.0, 916.643845754961),
 13: (6.5, 1618.2688676570394),
 14: (7.0, 2856.9374464856855),
 15: (7.5, 5043.717849524833),
 16: (8.0, 8904.321575856688),
 17: (8.5, 15719.940149653004),
 18: (9.0, 27752.42517955641),
 19: (9.5, 48994.91321306828)}

In [145]:
## function to obtain the band value
def get_bands(y): 
    res = [] # intializing a list
    
    for key,value in bands_dct.items(): # iterate through key, value pairs
        if key==0:continue # ignoring the first key
        
        if bands_dct[key][1] > y and y >= bands_dct[key-1][1]: # checking the condition
            return "-".join([str(bands_dct[key-1][0]), str(bands_dct[key][0])]) # returning the final value

finDf["band_bin"] = finDf["GTI_score"].apply(lambda x: get_bands(x)) # obtaining the required band values

In [146]:
finDf # printing the final dataframe

Unnamed: 0,iyear,country_txt,nkill,eventid,nwound,property,propextent,raw_scores,GTI_score,band_bin
0,2019,Zimbabwe,0.0,2,0.0,2,3.0,8.0,14.645161,2.0-2.5
1,2018,Zimbabwe,2.0,2,47.0,2,1.5,34.5,21.161290,2.5-3.0
2,2017,Zimbabwe,0.0,3,1.0,3,3.0,12.5,7.209677,1.5-2.0
3,2014,Zimbabwe,0.0,1,0.0,1,3.0,4.0,6.274194,1.5-2.0
4,2013,Zimbabwe,1.0,3,1.0,3,3.0,15.5,8.419355,1.5-2.0
...,...,...,...,...,...,...,...,...,...,...
3837,1990,Afghanistan,12.0,2,122.0,2,0.0,99.0,133.032258,4.0-4.5
3838,1989,Afghanistan,10.0,10,84.0,10,0.0,82.0,163.870968,4.0-4.5
3839,1988,Afghanistan,128.0,11,106.0,11,2.0,470.0,243.096774,4.5-5.0
3840,1987,Afghanistan,0.0,1,2.0,1,0.0,2.0,1.032258,0-0.5


####  Log-Tranformatiom

In [166]:
finDf["band_val"] = finDf["GTI_score"].apply(lambda x:abs(np.log10(x))*2) # log tranforming the band values

In [167]:
finDf # printing the transformed dataframe

Unnamed: 0,iyear,country_txt,nkill,eventid,nwound,property,propextent,raw_scores,GTI_score,band_bin,band_val
0,2019,Zimbabwe,0.0,2,0.0,2,3.0,8.0,14.645161,2.0-2.5,2.331388
1,2018,Zimbabwe,2.0,2,47.0,2,1.5,34.5,21.161290,2.5-3.0,2.651084
2,2017,Zimbabwe,0.0,3,1.0,3,3.0,12.5,7.209677,1.5-2.0,1.715832
3,2014,Zimbabwe,0.0,1,0.0,1,3.0,4.0,6.274194,1.5-2.0,1.595116
4,2013,Zimbabwe,1.0,3,1.0,3,3.0,15.5,8.419355,1.5-2.0,1.850558
...,...,...,...,...,...,...,...,...,...,...,...
3837,1990,Afghanistan,12.0,2,122.0,2,0.0,99.0,133.032258,4.0-4.5,4.247914
3838,1989,Afghanistan,10.0,10,84.0,10,0.0,82.0,163.870968,4.0-4.5,4.429004
3839,1988,Afghanistan,128.0,11,106.0,11,2.0,470.0,243.096774,4.5-5.0,4.771558
3840,1987,Afghanistan,0.0,1,2.0,1,0.0,2.0,1.032258,0-0.5,0.027577


In [168]:
finDf.to_csv("gti_scores.csv") # saving the data 