# M.Tech Dissertation Project
## Terrorist threat & link prediction system: Analysis & prediction of terrorist attack pattern and unrevealed hidden Links between terrorist's network 

##### Importing Libraries 

In [1]:
#Importing the required libraries for EDA (Exploratory data analysis)
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import seaborn as sns                       #visualisation
import matplotlib.pyplot as plt             #visualisation
plt.style.use('fivethirtyeight')
%matplotlib inline     
sns.set(color_codes=True)
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn import preprocessing

##### Coverting .xlsx file to .csv file 

In [2]:
# Data were take from GTD Terrorism Database inder 
# “Individual Use” includes scholarly, educational, research purposes, including classwork, theses, dissertations, and academic publication.
#convert .xlsx file to .csv file
     read_file = pd.read_excel ("globalterrorismdb_0221dist.xlsx") #Required to run Only first time UNCOMMENT ON 1st Time RUN
# Write the dataframe object
# into csv file
     read_file.to_csv ("globalterrorismdb_0221dist.csv", index = None,header=True) #Required to run Only first time UNCOMMENT ON 1st Time RUN
print ("Xsls file converted to csv sucessfully.")

Xsls file converted to csv sucessfully.


##### Loading the data into the data frame.

In [3]:
df = pd.read_csv("globalterrorismdb_0221dist.csv")
# To display the top 5 rows 
df.head(5)            

In [4]:
#To Display last 05 Rows
df.tail(5) 

##### Checking Column Heading

In [5]:
df.keys()

Index(['eventid', 'iyear', 'imonth', 'iday', 'approxdate', 'extended',
       'resolution', 'country', 'country_txt', 'region',
       ...
       'addnotes', 'scite1', 'scite2', 'scite3', 'dbsource', 'INT_LOG',
       'INT_IDEO', 'INT_MISC', 'INT_ANY', 'related'],
      dtype='object', length=135)

##### Checking the types of data

In [6]:
df.dtypes

eventid        int64
iyear          int64
imonth         int64
iday           int64
approxdate    object
               ...  
INT_LOG        int64
INT_IDEO       int64
INT_MISC       int64
INT_ANY        int64
related       object
Length: 135, dtype: object

##### Renaming the columns

In [7]:
df.rename(columns={'iyear':'Year','imonth':'Month','iday':'Day','country':'Country_Code','country_txt':'Country_Name','region':'Region_Code','region_txt':'Region_Name','provstate':'State','city':'City','latitude':'Latitude','longitude':'Longitude','attacktype1':'AttackType_Code','attacktype1_txt':'AttackType_Name','targtype1':'Targtype_Code','targtype1_txt':'Targtype_Name','targsubtype1':'Targsubtype_code','targsubtype1_txt':'Targsubtype_Occupation','gname':'Group_Name','weaptype1':'Weaptype1_Code','weaptype1_txt':'Weaptype1_Name','weapsubtype1':'Weapsubtype1_Code','weapsubtype1_txt':'Weapsubtype1_Name','nkill':'Killed','nwound':'Wounded'},inplace=True)

##### Dropping irrelevant columns

In [8]:
df=df[['Year','Month','Day','Country_Code','Country_Name','Region_Code','Region_Name','State','City','Latitude','Longitude','AttackType_Code','AttackType_Name','Targtype_Code','Targtype_Name','Targsubtype_code','Targsubtype_Occupation','Group_Name','Weaptype1_Code','Weaptype1_Name','Weapsubtype1_Code','Weapsubtype1_Name','Killed','Wounded']] 

##### Again Checking for Column Names

In [9]:
#df.describe()
df.keys()

Index(['Year', 'Month', 'Day', 'Country_Code', 'Country_Name', 'Region_Code',
       'Region_Name', 'State', 'City', 'Latitude', 'Longitude',
       'AttackType_Code', 'AttackType_Name', 'Targtype_Code', 'Targtype_Name',
       'Targsubtype_code', 'Targsubtype_Occupation', 'Group_Name',
       'Weaptype1_Code', 'Weaptype1_Name', 'Weapsubtype1_Code',
       'Weapsubtype1_Name', 'Killed', 'Wounded'],
      dtype='object')

##### Again Checking for Data Type

In [10]:
df.dtypes

Year                        int64
Month                       int64
Day                         int64
Country_Code                int64
Country_Name               object
Region_Code                 int64
Region_Name                object
State                      object
City                       object
Latitude                  float64
Longitude                 float64
AttackType_Code             int64
AttackType_Name            object
Targtype_Code               int64
Targtype_Name              object
Targsubtype_code          float64
Targsubtype_Occupation     object
Group_Name                 object
Weaptype1_Code              int64
Weaptype1_Name             object
Weapsubtype1_Code         float64
Weapsubtype1_Name          object
Killed                    float64
Wounded                   float64
dtype: object

In [11]:
df.head(10)

Unnamed: 0,Year,Month,Day,Country_Code,Country_Name,Region_Code,Region_Name,State,City,Latitude,...,Targtype_Name,Targsubtype_code,Targsubtype_Occupation,Group_Name,Weaptype1_Code,Weaptype1_Name,Weapsubtype1_Code,Weapsubtype1_Name,Killed,Wounded
0,1970,7,2,58,Dominican Republic,2,Central America & Caribbean,National,Santo Domingo,18.456792,...,Private Citizens & Property,68.0,Named Civilian,MANO-D,13,Unknown,,,1.0,0.0
1,1970,0,0,130,Mexico,1,North America,Federal,Mexico city,19.371887,...,Government (Diplomatic),45.0,"Diplomatic Personnel (outside of embassy, cons...",23rd of September Communist League,13,Unknown,,,0.0,0.0
2,1970,1,0,160,Philippines,5,Southeast Asia,Tarlac,Unknown,15.478598,...,Journalists & Media,54.0,Radio Journalist/Staff/Facility,Unknown,13,Unknown,,,1.0,0.0
3,1970,1,0,78,Greece,8,Western Europe,Attica,Athens,37.99749,...,Government (Diplomatic),46.0,Embassy/Consulate,Unknown,6,Explosives,16.0,Unknown Explosive Type,,
4,1970,1,0,101,Japan,4,East Asia,Fukouka,Fukouka,33.580412,...,Government (Diplomatic),46.0,Embassy/Consulate,Unknown,8,Incendiary,,,,
5,1970,1,1,217,United States,1,North America,Illinois,Cairo,37.005105,...,Police,22.0,"Police Building (headquarters, station, school)",Black Nationalists,5,Firearms,5.0,Unknown Gun Type,0.0,0.0
6,1970,1,2,218,Uruguay,3,South America,Montevideo,Montevideo,-34.891151,...,Police,25.0,Police Security Forces/Officers,Tupamaros (Uruguay),5,Firearms,2.0,Automatic or Semi-Automatic Rifle,0.0,0.0
7,1970,1,2,217,United States,1,North America,California,Oakland,37.791927,...,Utilities,107.0,Electricity,Unknown,6,Explosives,16.0,Unknown Explosive Type,0.0,0.0
8,1970,1,2,217,United States,1,North America,Wisconsin,Madison,43.076592,...,Military,28.0,Military Recruiting Station/Academy,New Year's Gang,8,Incendiary,19.0,Molotov Cocktail/Petrol Bomb,0.0,0.0
9,1970,1,3,217,United States,1,North America,Wisconsin,Madison,43.07295,...,Government (General),21.0,Government Building/Facility/Office,New Year's Gang,8,Incendiary,20.0,Gasoline or Alcohol,0.0,0.0


###### Data Preprocessing

In [12]:
df.to_csv('newout.csv', index = None,header=True)

In [13]:
df.shape

(201183, 24)

In [14]:
#Replace unknown Cities, Attack Types and Target Types to the most frequent Cities, Attack Types and Target Types in that specific Country
#NO NEED TO RUN EVERY TIME IF already RUN
def replace(i,key):
    search = df['Country_Code'][i]
    country = df[df.Country_Code == search]
    if country[key].count() == 1:
        return df['Country_Name'][i]

    temp = dict(country.groupby([key]).size())
    del temp['Unknown']
    maxtemp = max(zip(temp.values(), temp.keys()))[1]        
    return maxtemp
    
create = ['City','AttackType_Name','Targtype_Name']

for j in create:
    for i in range(len(df[j])):
        if df[j][i] == 'Unknown':
            df[j][i] = replace(i,j)

In [20]:
df.to_csv('newout_After_Repl_City_Attack_Type_Target_Type.csv', index = None,header=True)

In [15]:
df.shape

(201183, 24)

In [16]:
#Replace unknown Group Names and Weapon Types to the most frequent Group Names and Weapon Types in that specific Region.
#NO NEED TO RUN EVERY TIME IF already RUN
def replace1(i,key):
    search = df['Region_Code'][i]
    country = df[df.Region_Code == search]

    temp = dict(country.groupby([key]).size())
    del temp['Unknown']
    maxtemp = max(zip(temp.values(), temp.keys()))[1]        
    return maxtemp

create = ['Group_Name','Weaptype1_Name']
for j in create:
    for i in range(len(df[j])):
        if df[j][i] == 'Unknown':
            df[j][i] = replace1(i,j)
#df.to_csv('newout.csv')

In [17]:
df.to_csv('newout_After_Group_Name_Wepon_Type_without_Null_Repeat_Row_Removed.csv', index = None,header=True)

In [19]:
df.shape

(201183, 24)