## Clean up terrorism data

In [1]:
# Dependencies and Setup
import pandas as pd
import json
import numpy as np

In [2]:
# Read csv file and convert to dataframe
terrorism_file = "files/globalterrorismd.csv"

terrorism_df = pd.read_csv(terrorism_file)
terrorism_df

Unnamed: 0,year,country,region,state,city,latitude,longitude,nationality
0,1990,Lebanon,Middle East & North Africa,Beirut,Beirut,33.888523,35.503513,Lebanon
1,1990,India,South Asia,Jammu and Kashmir,Srinagar,34.083740,74.789902,India
2,1990,India,South Asia,Jammu and Kashmir,Srinagar,34.083740,74.789902,India
3,1990,India,South Asia,Jammu and Kashmir,Srinagar,34.083740,74.789902,India
4,1990,Bolivia,South America,Cochabamba,Cochabamba,-17.382789,-66.166439,United States
...,...,...,...,...,...,...,...,...
150387,2018,Afghanistan,South Asia,Zabul,Sorkhi Sang,32.445646,66.876249,Afghanistan
150388,2018,Afghanistan,South Asia,Faryab,Khwaja Sabz Posh district,36.057119,64.845477,Afghanistan
150389,2018,Afghanistan,South Asia,Jawzjan,Dik Sar,36.229290,65.460288,Afghanistan
150390,2018,Afghanistan,South Asia,Helmand,Nad Ali district,31.639085,64.243436,Afghanistan


In [3]:
# Use count function to get the number of attacks each year per country
group_country = terrorism_df.groupby(["country", "year"])["country"].count()
group_country


country      year
Afghanistan  1990     2
             1991    30
             1992    36
             1994     9
             1995     6
                     ..
Zimbabwe     2011     1
             2013     3
             2014     1
             2017     3
             2018     2
Name: country, Length: 2650, dtype: int64

In [4]:
# convert above results to a dataframe
new_data = pd.DataFrame(group_country).rename(columns={"country": "attacksCount"}).reset_index()
new_data

Unnamed: 0,country,year,attacksCount
0,Afghanistan,1990,2
1,Afghanistan,1991,30
2,Afghanistan,1992,36
3,Afghanistan,1994,9
4,Afghanistan,1995,6
...,...,...,...
2645,Zimbabwe,2011,1
2646,Zimbabwe,2013,3
2647,Zimbabwe,2014,1
2648,Zimbabwe,2017,3


In [5]:
# save file to json format
terrorism_count = new_data.to_json("files/terrorismAttacks.json", orient="records")

In [6]:
# extract country of attack and attacker nationality
comparison = terrorism_df[["country", "nationality"]] 
comparison

Unnamed: 0,country,nationality
0,Lebanon,Lebanon
1,India,India
2,India,India
3,India,India
4,Bolivia,United States
...,...,...
150387,Afghanistan,Afghanistan
150388,Afghanistan,Afghanistan
150389,Afghanistan,Afghanistan
150390,Afghanistan,Afghanistan


In [7]:
# use numpy to compare country and nationality to check number of domestic attacks vs international
comparison_column = np.where(comparison["country"] == comparison["nationality"], True, False)
# compare "country"` and `"nationality"`

print(comparison_column)


[ True  True  True ...  True  True False]


In [8]:
comparison["equal"] = comparison_column
print(comparison)

               country       nationality  equal
0              Lebanon           Lebanon   True
1                India             India   True
2                India             India   True
3                India             India   True
4              Bolivia     United States  False
...                ...               ...    ...
150387     Afghanistan       Afghanistan   True
150388     Afghanistan       Afghanistan   True
150389     Afghanistan       Afghanistan   True
150390     Afghanistan       Afghanistan   True
150391  United Kingdom  Northern Ireland  False

[150392 rows x 3 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  """Entry point for launching an IPython kernel.


In [9]:
comparison_data = pd.DataFrame(comparison)
comparison_data

Unnamed: 0,country,nationality,equal
0,Lebanon,Lebanon,True
1,India,India,True
2,India,India,True
3,India,India,True
4,Bolivia,United States,False
...,...,...,...
150387,Afghanistan,Afghanistan,True
150388,Afghanistan,Afghanistan,True
150389,Afghanistan,Afghanistan,True
150390,Afghanistan,Afghanistan,True


In [10]:
count2 = comparison_data.groupby(["country", "nationality", "equal"])["equal"].count()
count2

country      nationality    equal
Afghanistan  Afghanistan    True     12408
             Algeria        False        2
             Asian          False        3
             Australia      False        3
             Bangladesh     False        6
                                     ...  
Zimbabwe     Ethiopia       False        1
             Great Britain  False        3
             South Africa   False        1
             United States  False        1
             Zimbabwe       True        35
Name: equal, Length: 1916, dtype: int64

In [11]:
new_count = pd.DataFrame(count2).rename(columns={"equal":"xyz"}).reset_index()
new_count.head(50)

Unnamed: 0,country,nationality,equal,xyz
0,Afghanistan,Afghanistan,True,12408
1,Afghanistan,Algeria,False,2
2,Afghanistan,Asian,False,3
3,Afghanistan,Australia,False,3
4,Afghanistan,Bangladesh,False,6
5,Afghanistan,Canada,False,7
6,Afghanistan,China,False,4
7,Afghanistan,Denmark,False,1
8,Afghanistan,East Timor,False,1
9,Afghanistan,France,False,12


In [15]:
# new_count.columns
new_df = new_count[["country", "equal", "xyz"]]
new_df.head()

Unnamed: 0,country,equal,xyz
0,Afghanistan,True,12408
1,Afghanistan,False,2
2,Afghanistan,False,3
3,Afghanistan,False,3
4,Afghanistan,False,6


In [16]:
equal_country = new_df.groupby(["country", "equal"])["xyz"].sum()
equal_country

country      equal
Afghanistan  False     1238
             True     12408
Albania      False        6
             True        69
Algeria      False       91
                      ...  
Zaire        True        25
Zambia       False        2
             True        25
Zimbabwe     False        7
             True        35
Name: xyz, Length: 359, dtype: int64

In [17]:
new_df2 = pd.DataFrame(equal_country).reset_index()
new_df2.head(50)

Unnamed: 0,country,equal,xyz
0,Afghanistan,False,1238
1,Afghanistan,True,12408
2,Albania,False,6
3,Albania,True,69
4,Algeria,False,91
5,Algeria,True,2622
6,Angola,False,59
7,Angola,True,373
8,Antigua and Barbuda,True,1
9,Argentina,False,28


In [24]:
comparison_df = new_df2.to_json("files/comparisondata.json", orient="records")

In [18]:
country_list = new_data.country.unique()
country_list

array(['Afghanistan', 'Albania', 'Algeria', 'Angola',
       'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia',
       'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh',
       'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan',
       'Bolivia', 'Bosnia-Herzegovina', 'Botswana', 'Brazil', 'Brunei',
       'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
       'Canada', 'Central African Republic', 'Chad', 'Chile', 'China',
       'Colombia', 'Comoros', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus',
       'Czech Republic', 'Czechoslovakia',
       'Democratic Republic of the Congo', 'Denmark', 'Djibouti',
       'Dominica', 'Dominican Republic', 'East Germany (GDR)',
       'East Timor', 'Ecuador', 'Egypt', 'El Salvador',
       'Equatorial Guinea', 'Eritrea', 'Estonia', 'Ethiopia', 'Fiji',
       'Finland', 'France', 'French Guiana', 'French Polynesia', 'Gabon',
       'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Guadeloupe',
       'Gua

In [26]:
list_df = pd.DataFrame(country_list, columns=["country_names"])
list_df

Unnamed: 0,country_names
0,Afghanistan
1,Albania
2,Algeria
3,Angola
4,Antigua and Barbuda
...,...
188,Yemen
189,Yugoslavia
190,Zaire
191,Zambia


In [27]:
countryList = list_df.to_json("files/countryList.json", orient="records")