# Pyspark

In [2]:
# Must be included at the beginning of each new notebook.
import findspark
findspark.init('/home/ubuntu/spark-2.1.1-bin-hadoop2.7')
import pyspark
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName('GlobalTerrorism').getOrCreate()

# Importing Dataset

In [3]:
df1 = spark.read.csv('Dataset_2.csv', header=True, inferSchema=True)
df2 = spark.read.csv('Dataset_1.csv', header=True, inferSchema=True)
df1.show()
df2.show()

+-----+------+-----+-------+-----------+------+--------------------+-----------------+----------+-----+-----+-----+-------+-----------+--------------------+---------+--------------------+-------+-----------------+--------------------+----------+---------+-------------+--------------------+-----+---------+------+
|iyear|imonth|idate|country|country_txt|region|          region_txt|        provstate|      city|crit1|crit2|crit3|success|attacktype1|     attacktype1_txt|targtype1|       targtype1_txt|natlty1|      natlty1_txt|               gname|individual|weaptype1|weaptype1_txt|          weapdetail|nkill|ishostkid|ransom|
+-----+------+-----+-------+-----------+------+--------------------+-----------------+----------+-----+-----+-----+-------+-----------+--------------------+---------+--------------------+-------+-----------------+--------------------+----------+---------+-------------+--------------------+-----+---------+------+
| 2000|     1|    1|    139|    Namibia|    11|  Sub-Sahar

# Size of Dataframe 1

In [4]:
len(df1.columns)

27

In [5]:
df1.count()

12273

# Size of Dataframe 2

In [6]:
len(df2.columns)

27

In [7]:
df2.count()

99583

# Merging two datasets

In [8]:
terrorismdf = df1.unionAll(df2)

In [9]:
len(terrorismdf.columns)

27

In [10]:
terrorismdf.count()

111856

# First row of dataframe 1

In [11]:
df1.head()
for item in df1.head():
    print(item)

2000
1
1
139
Namibia
11
Sub-Saharan Africa
Kavango
Muitjiku
1
1
1
1
2
Armed Assault
1
Business
139
Namibia
National Union for the Total Independence of Angola (UNITA)
0
5
Firearms
None
0
0
None


# First row of dataframe 2

In [12]:
df2.head()
for item in df2.head():
    print(item)

2007
1
1
155
West Bank and Gaza Strip
10
Middle East & North Africa
Gaza Strip
Unknown
1
1
1
1
6
Hostage Taking (Kidnapping)
10
Journalists & Media
159
Peru
Unknown
0
5
Firearms
None
0
1
0


# First row of Merged dataframe

In [13]:
terrorismdf.head()
for item in terrorismdf.head():
    print(item)

2000
1
1
139
Namibia
11
Sub-Saharan Africa
Kavango
Muitjiku
1
1
1
1
2
Armed Assault
1
Business
139
Namibia
National Union for the Total Independence of Angola (UNITA)
0
5
Firearms
None
0
0
None


# Schema

In [14]:
#idea of what the data looks like
terrorismdf.printSchema()

root
 |-- iyear: integer (nullable = true)
 |-- imonth: integer (nullable = true)
 |-- idate: integer (nullable = true)
 |-- country: integer (nullable = true)
 |-- country_txt: string (nullable = true)
 |-- region: integer (nullable = true)
 |-- region_txt: string (nullable = true)
 |-- provstate: string (nullable = true)
 |-- city: string (nullable = true)
 |-- crit1: integer (nullable = true)
 |-- crit2: integer (nullable = true)
 |-- crit3: integer (nullable = true)
 |-- success: integer (nullable = true)
 |-- attacktype1: integer (nullable = true)
 |-- attacktype1_txt: string (nullable = true)
 |-- targtype1: integer (nullable = true)
 |-- targtype1_txt: string (nullable = true)
 |-- natlty1: integer (nullable = true)
 |-- natlty1_txt: string (nullable = true)
 |-- gname: string (nullable = true)
 |-- individual: integer (nullable = true)
 |-- weaptype1: integer (nullable = true)
 |-- weaptype1_txt: string (nullable = true)
 |-- weapdetail: string (nullable = true)
 |-- nkill: str

In [17]:
terrorismdf.columns

['iyear',
 'imonth',
 'idate',
 'country',
 'country_txt',
 'region',
 'region_txt',
 'provstate',
 'city',
 'crit1',
 'crit2',
 'crit3',
 'success',
 'attacktype1',
 'attacktype1_txt',
 'targtype1',
 'targtype1_txt',
 'natlty1',
 'natlty1_txt',
 'gname',
 'individual',
 'weaptype1',
 'weaptype1_txt',
 'weapdetail',
 'nkill',
 'ishostkid',
 'ransom']

# Importing libraries for better visualisation of dataframes

In [15]:
# Import the libraries.
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import numpy as np

In [19]:
#importing datasets from pandas for visualisation
df1_pd = pd.read_csv('Dataset_2.csv')
print(df1_pd.shape)
df2_pd = pd.read_csv('Dataset_1.csv')
print(df2_pd.shape)

(12273, 27)
(99583, 27)


In [21]:
#appending the two datasets
terrorismdf_pd = df1_pd.append(df2_pd, ignore_index = True)

#printing the number of columns and rows
print(terrorismdf_pd.shape)

#printing the columns name to see how to change the names to relevant names
print(terrorismdf_pd.columns)

(111856, 27)
Index(['iyear', 'imonth', 'idate', 'country', 'country_txt', 'region',
       'region_txt', 'provstate', 'city', 'crit1', 'crit2', 'crit3', 'success',
       'attacktype1', 'attacktype1_txt', 'targtype1', 'targtype1_txt',
       'natlty1', 'natlty1_txt', 'gname', 'individual', 'weaptype1',
       'weaptype1_txt', 'weapdetail', 'nkill', 'ishostkid', 'ransom'],
      dtype='object')


In [22]:
df1_pd.head(2)

Unnamed: 0,iyear,imonth,idate,country,country_txt,region,region_txt,provstate,city,crit1,...,natlty1,natlty1_txt,gname,individual,weaptype1,weaptype1_txt,weapdetail,nkill,ishostkid,ransom
0,2000,1,1,139,Namibia,11,Sub-Saharan Africa,Kavango,Muitjiku,1,...,139.0,Namibia,National Union for the Total Independence of A...,0,5,Firearms,,0.0,0.0,
1,2000,1,1,139,Namibia,11,Sub-Saharan Africa,Kavango,Muitjiku,1,...,139.0,Namibia,National Union for the Total Independence of A...,0,5,Firearms,,0.0,1.0,0.0


In [23]:
df1_pd.tail(2)

Unnamed: 0,iyear,imonth,idate,country,country_txt,region,region_txt,provstate,city,crit1,...,natlty1,natlty1_txt,gname,individual,weaptype1,weaptype1_txt,weapdetail,nkill,ishostkid,ransom
12271,2006,12,31,141,Nepal,6,South Asia,Eastern,Khadgapur,1,...,141.0,Nepal,Janatantrik Terai Mukti Morcha- Jwala Singh (J...,0,9,Melee,The attackers first threw rocks at the vehicle...,0.0,0.0,
12272,2006,12,31,141,Nepal,6,South Asia,Central,Sundarpur,1,...,141.0,Nepal,Janatantrik Terai Mukti Morcha (JTMM),0,13,Unknown,,1.0,1.0,0.0


In [24]:
df2_pd.head(2)

Unnamed: 0,iyear,imonth,idate,country,country_txt,region,region_txt,provstate,city,crit1,...,natlty1,natlty1_txt,gname,individual,weaptype1,weaptype1_txt,weapdetail,nkill,ishostkid,ransom
0,2007,1,1,155,West Bank and Gaza Strip,10,Middle East & North Africa,Gaza Strip,Unknown,1,...,159.0,Peru,Unknown,0,5,Firearms,,0.0,1.0,0.0
1,2007,1,1,205,Thailand,5,Southeast Asia,Bangkok ( District ),Bangkok,1,...,205.0,Thailand,Unknown,0,6,Explosives,,3.0,0.0,


In [25]:
df2_pd.tail(2)

Unnamed: 0,iyear,imonth,idate,country,country_txt,region,region_txt,provstate,city,crit1,...,natlty1,natlty1_txt,gname,individual,weaptype1,weaptype1_txt,weapdetail,nkill,ishostkid,ransom
99581,2017,12,31,92,India,6,South Asia,Manipur,Imphal,1,...,92.0,India,Unknown,0,6,Explosives,A thrown grenade was used in the attack.,0.0,0.0,
99582,2017,12,31,160,Philippines,5,Southeast Asia,Maguindanao,Cotabato City,1,...,160.0,Philippines,Unknown,0,6,Explosives,An explosive device containing a detonating co...,0.0,0.0,


In [26]:
terrorismdf_pd.head(2)

Unnamed: 0,iyear,imonth,idate,country,country_txt,region,region_txt,provstate,city,crit1,...,natlty1,natlty1_txt,gname,individual,weaptype1,weaptype1_txt,weapdetail,nkill,ishostkid,ransom
0,2000,1,1,139,Namibia,11,Sub-Saharan Africa,Kavango,Muitjiku,1,...,139.0,Namibia,National Union for the Total Independence of A...,0,5,Firearms,,0.0,0.0,
1,2000,1,1,139,Namibia,11,Sub-Saharan Africa,Kavango,Muitjiku,1,...,139.0,Namibia,National Union for the Total Independence of A...,0,5,Firearms,,0.0,1.0,0.0


In [28]:
terrorismdf_pd.tail(2)

Unnamed: 0,iyear,imonth,idate,country,country_txt,region,region_txt,provstate,city,crit1,...,natlty1,natlty1_txt,gname,individual,weaptype1,weaptype1_txt,weapdetail,nkill,ishostkid,ransom
111854,2017,12,31,92,India,6,South Asia,Manipur,Imphal,1,...,92.0,India,Unknown,0,6,Explosives,A thrown grenade was used in the attack.,0.0,0.0,
111855,2017,12,31,160,Philippines,5,Southeast Asia,Maguindanao,Cotabato City,1,...,160.0,Philippines,Unknown,0,6,Explosives,An explosive device containing a detonating co...,0.0,0.0,


# Initial Exploration and Visualisation 

In [18]:
#finding correlation for country
terrorismdf.corr()["country"]

TypeError: corr() missing 2 required positional arguments: 'col1' and 'col2'