In [21]:
import zeit_json_to_plot as j2p
import fetch_all_articles
from datetime import datetime
import parse_terror_data as ptd
import plotly
import plotly.graph_objs as go
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [22]:
# let's test if zeit_json_to_plot is correct
terror_json = j2p.get_json_data("zeit_terror_cleaned.json")
terror_json

[{'href': 'http://www.zeit.de/gesellschaft/zeitgeschehen/2017-02/montpellier-anti-terror-einsatz-terrorverdaechtige-festnahmen',
  'keywords': [{'name': 'Frankreich',
    'rel': 'location',
    'uri': 'http://api.zeit.de/keyword/frankreich'},
   {'name': 'Montpellier',
    'rel': 'location',
    'uri': 'http://api.zeit.de/keyword/montpellier'},
   {'name': 'Terroranschläge',
    'rel': 'subject',
    'uri': 'http://api.zeit.de/keyword/terroranschlaege'},
   {'name': 'Islamischer Staat',
    'rel': 'organization',
    'uri': 'http://api.zeit.de/keyword/islamischer-staat'},
   {'name': 'Terrorbekämpfung',
    'rel': 'subject',
    'uri': 'http://api.zeit.de/keyword/terrorbekaempfung'}],
  'release_date': '2017-02-10',
  'subtitle': 'Antiterroreinheiten haben in Südfrankreich vier Verdächtige festgenommen – unter ihnen eine 16-Jährige. Sie planten offenbar einen Sprengstoffanschlag in Paris.',
  'supertitle': 'Montpellier',
  'teaser_text': 'Antiterroreinheiten haben in Südfrankreich vier

In [23]:
# let's get the german cities
in_germany = j2p.get_german_cities(terror_json)
list(in_germany)[1:5]

['Lübeck', 'Aachen', 'Wunsiedel', 'Bitburg']

In [24]:
# now get the date-list and filter it by 2000-2015
date_list_germany = j2p.filter_data_for_mentioned_cities(terror_json, in_germany)
date_list_filtered_germany = j2p.filter_for_date_range(date_list_germany, datetime(2000,1,1), datetime(2015,12,31))

# get the df-plot
news_df_germany = j2p.generate_data_frame(date_list_filtered_germany)
news_df_germany.head()

Unnamed: 0_level_0,datetime
datetime,Unnamed: 1_level_1
2015-12-31,2015-12-31
2015-12-07,2015-12-07
2015-11-14,2015-11-14
2015-01-29,2015-01-29
2013-03-14,2013-03-14


In [25]:
# ============================================== #
# Excursion - we want relative article numbers
# so we get the number of ALL articles in zeit.de
all_news_dict = fetch_all_articles.get_article_count()
# ============================================== #

In [26]:
# group data and create dictionary
grouped_dates = news_df_germany.resample("M").count()

timestamp_dict = grouped_dates.to_dict()
timestamp_dict = timestamp_dict["datetime"]

# get relative numbers
timestamp_dict = ptd.get_relative_numbers(timestamp_dict, all_news_dict)

news_scatter_germany = j2p.create_scatter_plot_from_date_dict(timestamp_dict,"Terror-News about Germany")
layout = plotly.graph_objs.Layout(
        title="Relative Number of Articles about Terror on zeit.de in Germany since 2000",
        xaxis=dict(title="Year"),
        yaxis=dict(title="Number of article (grouped by month)"))
fig = plotly.graph_objs.Figure(data=[news_scatter_germany], layout=layout)
iplot(fig)

In [27]:
# we want western country-data too

# hacky stuff...make list of countries from zeit-api in terror_json-style
locations_in_zeit = list()
for entry in terror_json:
    for keyword in entry["keywords"]:
        if keyword["rel"] == "location":
            locations_in_zeit.append({"country" : keyword["name"]})
countries_europe_de = ptd.get_countries_in_europe_from_data(locations_in_zeit, "de")
countries_europe_de = countries_europe_de.union(in_germany)



date_list_europe = j2p.filter_data_for_mentioned_cities(terror_json, countries_europe_de)
date_list_filtered = j2p.filter_for_date_range(date_list_europe, datetime(2000,1,1), datetime(2015,12,31))
news_df_western = j2p.generate_data_frame(date_list_filtered)
# group data and create dictionary
grouped_dates = news_df_western.resample("M").count()
timestamp_dict = grouped_dates.to_dict()
timestamp_dict = timestamp_dict["datetime"]

# get relative numbers
timestamp_dict = ptd.get_relative_numbers(timestamp_dict, all_news_dict)

news_scatter_western = j2p.create_scatter_plot_from_date_dict(timestamp_dict, "Terror-News about Western countries")
layout = plotly.graph_objs.Layout(
        title="Relativ Number of Articles about Terror on zeit.de in Western Countries since 2000",
        xaxis=dict(title="Year"),
        yaxis=dict(title="Number of article (grouped by month)"))
fig = plotly.graph_objs.Figure(data=[news_scatter_western], layout=layout)
iplot(fig)

In [28]:
# we want global data too
date_list = j2p.json_to_date_list(terror_json)
date_list_filtered = j2p.filter_for_date_range(date_list, datetime(2000,1,1), datetime(2015,12,31))
news_df_global = j2p.generate_data_frame(date_list_filtered)
# group data and create dictionary
grouped_dates = news_df_global.resample("M").count()
timestamp_dict = grouped_dates.to_dict()
timestamp_dict = timestamp_dict["datetime"]

# get relative numbers
timestamp_dict = ptd.get_relative_numbers(timestamp_dict, all_news_dict)

news_scatter_global = j2p.create_scatter_plot_from_date_dict(timestamp_dict,"Any Terror-News")
layout = plotly.graph_objs.Layout(
        title="Number of Articles on zeit.de about Terror in the World since 2000",
        xaxis=dict(title="Year"),
        yaxis=dict(title="Number of article (grouped by month)"))
fig = plotly.graph_objs.Figure(data=[news_scatter_global], layout=layout)
iplot(fig)

## Here we get the terror-date to plot

In [29]:
# parse file for relevant data
terror_attacks = ptd.filter_csv_for_date_range("globalterrorismdb_0616dist.csv", 
                                               datetime(2000,1,1), 
                                               datetime(2015,12,31))

terror_df_global = ptd.generate_data_frame(terror_attacks)

terror_grouped_global = terror_df_global.resample("M").sum()
terror_grouped_global = terror_grouped_global.fillna(0)
terror_grouped_global

Unnamed: 0_level_0,casulties
datetime,Unnamed: 1_level_1
2000-01-31,833.0
2000-02-29,627.0
2000-03-31,797.0
2000-04-30,292.0
2000-05-31,1100.0
2000-06-30,489.0
2000-07-31,889.0
2000-08-31,813.0
2000-09-30,1197.0
2000-10-31,1067.0


In [30]:
terror_timestamp_dict = terror_grouped_global.to_dict()
terror_timestamp_dict = terror_timestamp_dict["casulties"]
terror_timestamp_dict

{Timestamp('2000-01-31 00:00:00', freq='M'): 833.0,
 Timestamp('2000-02-29 00:00:00', freq='M'): 627.0,
 Timestamp('2000-03-31 00:00:00', freq='M'): 797.0,
 Timestamp('2000-04-30 00:00:00', freq='M'): 292.0,
 Timestamp('2000-05-31 00:00:00', freq='M'): 1100.0,
 Timestamp('2000-06-30 00:00:00', freq='M'): 489.0,
 Timestamp('2000-07-31 00:00:00', freq='M'): 889.0,
 Timestamp('2000-08-31 00:00:00', freq='M'): 813.0,
 Timestamp('2000-09-30 00:00:00', freq='M'): 1197.0,
 Timestamp('2000-10-31 00:00:00', freq='M'): 1066.999999999,
 Timestamp('2000-11-30 00:00:00', freq='M'): 1011.0,
 Timestamp('2000-12-31 00:00:00', freq='M'): 1174.0,
 Timestamp('2001-01-31 00:00:00', freq='M'): 855.0,
 Timestamp('2001-02-28 00:00:00', freq='M'): 575.0,
 Timestamp('2001-03-31 00:00:00', freq='M'): 850.0,
 Timestamp('2001-04-30 00:00:00', freq='M'): 1221.0,
 Timestamp('2001-05-31 00:00:00', freq='M'): 777.0,
 Timestamp('2001-06-30 00:00:00', freq='M'): 792.0,
 Timestamp('2001-07-31 00:00:00', freq='M'): 478.0

In [31]:
terror_scatter_global = j2p.create_scatter_plot_from_date_dict(terror_timestamp_dict, "Terror Attacks in the World")
layout = plotly.graph_objs.Layout(
        title="Number of casulties in terror attack around the globe",
        xaxis=dict(title="Year"),
        yaxis=dict(title="Number of casulties (grouped by month)"))
fig = plotly.graph_objs.Figure(data=[terror_scatter_global], layout=layout)
iplot(fig)

In [32]:
# we want attacks on western countries too...let's look what the data looks like
regions = set()
for attack in terror_attacks:
    regions.add(attack["region"])
regions

{'Australasia & Oceania',
 'Central America & Caribbean',
 'Central Asia',
 'East Asia',
 'Eastern Europe',
 'Middle East & North Africa',
 'North America',
 'South America',
 'South Asia',
 'Southeast Asia',
 'Sub-Saharan Africa',
 'Western Europe'}

In [33]:
# Western countries is Western Europe + North America
terror_attacks_on_western = list()
for attack in terror_attacks:
        if attack["region"] == 'Western Europe' or attack["region"] == 'North America':
            terror_attacks_on_western.append(attack)
    

In [34]:
#terror_attacks_on_western = ptd.filter_by_country_list(terror_attacks, europe_country_set)
terror_df_western = ptd.generate_data_frame(terror_attacks_on_western)

terror_grouped_western = terror_df_western.resample("M").sum()
terror_grouped_western = terror_grouped_western.fillna(0)
terror_timestamp_dict = terror_grouped_western.to_dict()
terror_timestamp_dict = terror_timestamp_dict["casulties"]

terror_scatter_western = j2p.create_scatter_plot_from_date_dict(terror_timestamp_dict, "Terror Attacks in Western Countries")


layout = plotly.graph_objs.Layout(
        title="Number of casulties in terror attack in western countries",
        xaxis=dict(title="Year"),
        yaxis=dict(range=[0,200],title="Number of casulties (grouped by month)"))
fig = go.Figure(data=[terror_scatter_western], layout=layout)
iplot(fig, filename="simple-inset")

In [35]:
terror_attacks_on_western = ptd.filter_by_country_list(terror_attacks, ["Germany"])
terror_df_western = ptd.generate_data_frame(terror_attacks_on_western)

terror_grouped_western = terror_df_western.resample("M").sum()
terror_grouped_western = terror_grouped_western.fillna(0)
terror_timestamp_dict = terror_grouped_western.to_dict()
terror_timestamp_dict = terror_timestamp_dict["casulties"]

terror_scatter_germany = j2p.create_scatter_plot_from_date_dict(terror_timestamp_dict, "Terror Attacks in Germany")


layout = plotly.graph_objs.Layout(
        title="Number of Casulties in Terror Attack in Germany",
        xaxis=dict(title="Year"),
        yaxis=dict(title="Number of Casulties (Grouped by Month)"),
        )
fig = go.Figure(data=[terror_scatter_germany], layout=layout)
iplot(fig, filename="simple-inset")

# This was the code....now we just make plots....

In [36]:
layout = go.Layout(
    title='Global Terror attacks and news about it in zeit.de between 2000-2015',
    xaxis=dict(
        title="Year"),
    yaxis=dict(
        title="Relative Number of articles about terror in % (grouped by month)",
        rangemode = "nonnegative"),
    yaxis2=dict(
        title='Number of terror-attacks (grouped by month)',
        overlaying='y',
        side='right',
        rangemode = "nonnegative"
    )
)
terror_scatter_global.update({"yaxis" : "y2"})
fig = go.Figure(data=[terror_scatter_global,news_scatter_global], layout=layout)
iplot(fig)

nodes to above diagramm:
  * number of articles doesnt grow as fast as Terror attacks
  * 

In [37]:
layout = go.Layout(
    title='Terror attacks and news about Western countries in zeit.de between 2000-2015',
    xaxis=dict(
        title="Year"),
    yaxis=dict(
        title="Relative Number of articles about terror in % (grouped by month)",
        rangemode = "nonnegative"),
    yaxis2=dict(
        title='Number of terror-attacks (grouped by month)',
        overlaying='y',
        side='right',
        range=[0,200]
    )
)
terror_scatter_western.update({"yaxis" : "y2"})
fig = go.Figure(data=[terror_scatter_western,news_scatter_western], layout=layout)
iplot(fig)

In [38]:
layout = go.Layout(
    title='Terror attacks and news about Germany in zeit.de between 2000-2015',
    xaxis=dict(
        title="Year"),
    yaxis=dict(
        title="Relative Number of articles about terror in % (grouped by month)",
        rangemode = "nonnegative"),
    yaxis2=dict(
        title='Number of terror-attacks (grouped by month)',
        overlaying='y',
        side='right',
        rangemode = "nonnegative"
    )
)
terror_scatter_germany.update({"yaxis" : "y2"})
fig = go.Figure(data=[terror_scatter_germany,news_scatter_germany], layout=layout)
iplot(fig)