In [62]:
from pathlib import Path
BASE = Path("/content/Assignment3")
(BASE).mkdir(parents=True, exist_ok=True)
print("Working folder:", BASE)

Working folder: /content/Assignment3


In [2]:
from google.colab import files
up = files.upload()

Saving region_02.csv to region_02.csv


In [3]:
import shutil, os, pandas as pd
csv_uploaded_name = list(up.keys())[0]
dest = BASE / "region_02.csv"
shutil.move(csv_uploaded_name, dest)
print("Saved:", dest)

#reading only the columns we need
use_cols = ["country_txt","iyear","gname","attacktype1_txt","targtype1_txt","nkill","nwound"]
df = pd.read_csv(dest, low_memory=False)
missing = [c for c in use_cols if c not in df.columns]
print("Columns present:", sorted(df.columns.tolist())[:18], "...")
print("Missing (if any):", missing)

#Light cleaning: coerce numeric casualties
for c in ["nkill","nwound","iyear"]:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors="coerce")

print("Rows:", len(df))
df.head(3)


Saved: /content/Assignment3/region_02.csv
Columns present: ['INT_ANY', 'INT_IDEO', 'INT_LOG', 'INT_MISC', 'addnotes', 'alternative', 'alternative_txt', 'approxdate', 'attacktype1', 'attacktype1_txt', 'attacktype2', 'attacktype2_txt', 'attacktype3', 'attacktype3_txt', 'city', 'claim2', 'claim3', 'claimed'] ...
Missing (if any): []
Rows: 10386


Unnamed: 0,eventid,iyear,imonth,iday,approxdate,extended,resolution,country,country_txt,region,...,addnotes,scite1,scite2,scite3,dbsource,INT_LOG,INT_IDEO,INT_MISC,INT_ANY,related
0,197000000001,1970,7,2,,0,,58,Dominican Republic,2,...,,,,,PGIS,0,0,0,0,
1,197001200001,1970,1,20,,0,,83,Guatemala,2,...,,,,,PGIS,-9,-9,1,1,
2,197003060001,1970,3,6,,1,1970-03-08,83,Guatemala,2,...,,,,,PGIS,0,1,1,1,


#D3

In [6]:
#Precomputing tiny helpwer datasets
#Creating two small JSON files:
# 1. agg_by_country_year.json for the choropleth + slider
# 2. top_groups_by_year.json for the top-10 groups chart

import json

#Choropleth: incidents per country per year
if {"country_txt","iyear"}.issubset(df.columns):
    ctry_year = (
        df.groupby(["iyear","country_txt"], dropna=True)
          .size()
          .reset_index(name="count")
          .sort_values(["iyear","count"], ascending=[True, False])
    )
    choropleth_data = {}
    #sub has columns in this order: ["iyear", "country_txt", "count"]
    for y, sub in ctry_year.groupby("iyear"):
        choropleth_data[int(y)] = {
            str(country): int(cnt)
            for (_, country, cnt) in sub.itertuples(index=False, name=None)
        }
else:
    choropleth_data = {}

with open(BASE/"agg_by_country_year.json", "w") as f:
    json.dump(choropleth_data, f)

#Top 10 groups per year
if {"gname","iyear"}.issubset(df.columns):
    grp_year = (
        df.groupby(["iyear","gname"], dropna=True)
          .size()
          .reset_index(name="count")
    )
    groups_by_year = {}
    for y, sub in grp_year.groupby("iyear"):
        sub_sorted = sub.sort_values("count", ascending=False)
        #sub_sorted tuples: ["iyear","gname","count"] -> (y, g, c)
        rows = [{"gname": g, "count": int(c)}
                for (_, g, c) in sub_sorted.itertuples(index=False, name=None)]
        groups_by_year[int(y)] = rows
else:
    groups_by_year = {}

with open(BASE/"top_groups_by_year.json", "w") as f:
    json.dump(groups_by_year, f)

print("Wrote:",
      BASE/"agg_by_country_year.json",
      BASE/"top_groups_by_year.json")

Wrote: /content/Assignment3/agg_by_country_year.json /content/Assignment3/top_groups_by_year.json


In [7]:
#Downloading World GeoJSON
import requests
world_url = "https://raw.githubusercontent.com/holtzy/D3-graph-gallery/master/DATA/world.geojson"
geo_path = BASE/"world.geojson"
r = requests.get(world_url, timeout=60)
r.raise_for_status()
geo_path.write_bytes(r.content)
print("Saved:", geo_path, "bytes:", geo_path.stat().st_size)


Saved: /content/Assignment3/world.geojson bytes: 252487


##Global Terrorism Heatmap


The following HTML reads the two files created above and renders everything in-browser.

In [26]:
#building D3 heatmap HTML
import json

geo_path = BASE / "world.geojson"
byyear_path = BASE / "agg_by_country_year.json"

WORLD = json.loads(geo_path.read_text(encoding="utf-8"))
BYYEAR = json.loads(byyear_path.read_text(encoding="utf-8"))

choropleth = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<title>D3 Choropleth — Incidents by Country & Year</title>
<script src="https://d3js.org/d3.v7.min.js"></script>
<style>
body { font-family: system-ui, -apple-system, Segoe UI, Roboto, Arial; }
#wrap { width: 980px; margin: 20px auto; }
#header { display:flex; justify-content:space-between; align-items:center; }
.tooltip {
  position: absolute; padding: 6px 8px; background: rgba(0,0,0,0.85);
  color: #fff; border-radius: 4px; pointer-events: none; font-size: 12px;
}
.slider { width: 960px; margin: 8px 0 16px 0; }
.country { stroke: #bbb; stroke-width: 0.5px; }
#legend { font-size: 12px; margin-top: 6px; }
</style>
</head>
<body>
<div id="wrap">
  <div id="header">
    <h2>Incidents by Country</h2>
    <div id="yearLabel"></div>
  </div>
  <input type="range" class="slider" id="yearSlider" min="1970" max="2025" step="1" value="1970"/>
  <div id="map"></div>
  <div id="legend"></div>
</div>

<script>
const WORLD = """ + json.dumps(WORLD) + """;
const BYYEAR = """ + json.dumps(BYYEAR) + """;
const ALIAS = new Map([
  ["United States", "United States of America"],
  ["Russia", "Russian Federation"],
  ["Congo, Dem. Rep.", "Democratic Republic of the Congo"],
  ["Congo, Rep.", "Republic of the Congo"],
  ["Czech Republic", "Czechia"],
  ["Swaziland", "Eswatini"],
  ["Burma", "Myanmar"],
  ["South Korea", "Republic of Korea"],
  ["North Korea", "Dem. People's Republic of Korea"]
]);

const width = 960, height = 520;
const svg = d3.select("#map").append("svg")
  .attr("width", width)
  .attr("height", height);

const tip = d3.select("body").append("div").attr("class", "tooltip").style("opacity", 0);
const projection = d3.geoNaturalEarth1().scale(170).translate([width/2, height/2]);
const path = d3.geoPath().projection(projection);
const color = d3.scaleSequential(d3.interpolateReds).domain([0, 1]);

const legendW = 220, legendH = 10;
const legendSvg = d3.select("#legend").append("svg").attr("width", legendW+40).attr("height", 40);
const gradId = "grad";
const defs = legendSvg.append("defs");
const grad = defs.append("linearGradient").attr("id", gradId);
grad.append("stop").attr("offset", "0%").attr("stop-color", d3.interpolateReds(0));
grad.append("stop").attr("offset", "100%").attr("stop-color", d3.interpolateReds(1));
legendSvg.append("rect").attr("x", 10).attr("y", 10).attr("width", legendW).attr("height", legendH).style("fill", `url(#${gradId})`);
const legendScale = d3.scaleLinear().range([10, 10+legendW]);
const legendAxis = d3.axisBottom(legendScale).ticks(4).tickSize(3);
const legendAxisG = legendSvg.append("g").attr("class","legendAxis").attr("transform","translate(0, 22)");

const yearLabel = d3.select("#yearLabel");

const years = Object.keys(BYYEAR).map(d => +d).sort((a,b)=>a-b);
const slider = d3.select("#yearSlider")
  .attr("min", d3.min(years) ?? 1970)
  .attr("max", d3.max(years) ?? 1970)
  .on("input", (ev) => update(+ev.target.value));

const countries = svg.append("g").selectAll("path")
  .data(WORLD.features)
  .join("path")
  .attr("class", "country")
  .attr("d", path)
  .on("mousemove", (ev, d) => {
    const geoName = d.properties.name;
    const alt = [...ALIAS.entries()].find(([k,v]) => v === geoName);
    const key = (alt ? alt[0] : geoName);
    const cnt = currentYearData[key] || currentYearData[geoName] || 0;
    tip.style("opacity", 1)
       .html(`<b>${geoName}</b><br/>Incidents: ${cnt}`)
       .style("left", (ev.pageX+12)+"px")
       .style("top", (ev.pageY-20)+"px");
  })
  .on("mouseout", () => tip.style("opacity", 0));

let currentYearData = {};

function update(year) {
  yearLabel.text(year);
  currentYearData = BYYEAR[year] || {};
  const values = Object.values(currentYearData);
  const maxV = d3.max(values) || 1;
  color.domain([0, maxV]);
  legendScale.domain([0, maxV]);
  legendAxisG.call(legendAxis);

  countries.transition().duration(400)
    .attr("fill", d => {
      const geoName = d.properties.name;
      const alt = [...ALIAS.entries()].find(([k,v]) => v === geoName);
      const key = (alt ? alt[0] : geoName);
      return color(currentYearData[key] || currentYearData[geoName] || 0);
    });
}

const start = years[0] ?? 1970;
slider.property("value", start);
update(start);
</script>
</body>
</html>
"""


( BASE / "d3_heatmap.html" ).write_text(choropleth, encoding="utf-8")
print("Wrote:", BASE/"d3_heatmap.html")


Wrote: /content/Assignment3/d3_heatmap.html


In [27]:
from IPython.display import IFrame
import shutil
from google.colab import files

shutil.copy(str(BASE/"d3_heatmap.html"), "/content/d3_heatmap.html")
display(IFrame(src="/files/d3_heatmap.html", width=1024, height=640))

files.download("/content/d3_heatmap.html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

##Terrorists groups bar chart

In [60]:
import json
data_path = BASE / "top_groups_by_year.json"
GROUPS_BY_YEAR = json.loads(data_path.read_text(encoding="utf-8"))

top10_html = """
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8"/>
<title>D3 — Top 10 Groups by Year</title>
<script src="https://d3js.org/d3.v7.min.js"></script>
<style>
  body { font-family: system-ui, -apple-system, Segoe UI, Roboto, Arial; }
  #wrap { width: 900px; margin: 20px auto; }
  .axis text { font-size: 11px; }
  #meta { margin: 6px 0 12px 0; }
</style>
</head>
<body>
<div id="wrap">
  <h2>Top 10 Terrorist Groups</h2>
  <div id="meta">
    <label for="yearSelect">Year: </label>
    <select id="yearSelect"></select>
    <span id="totalYear"></span>
  </div>
  <div id="chart"></div>
</div>

<script>
const RAW = """ + json.dumps(GROUPS_BY_YEAR) + """;

const margin = {top:20,right:30,bottom:40,left:300},
      width=860-margin.left-margin.right,
      height=480-margin.top-margin.bottom;

const svg=d3.select("#chart").append("svg")
  .attr("width",width+margin.left+margin.right)
  .attr("height",height+margin.top+margin.bottom)
  .append("g").attr("transform",`translate(${margin.left},${margin.top})`);

const x=d3.scaleLinear().range([0,width]),
      y=d3.scaleBand().range([0,height]).padding(0.18);

// NEW: color scale based on incident count (light -> dark)
const color = d3.scaleLinear().range(["#c7d2fe", "#1e3a8a"]);

const xAxisG=svg.append("g").attr("transform",`translate(0,${height})`);
const yAxisG=svg.append("g");
const barsG=svg.append("g");
const labelsG=svg.append("g");
const totalYear=d3.select("#totalYear");

const years=Object.keys(RAW).map(d=>+d).sort((a,b)=>a-b);
const sel=d3.select("#yearSelect");
sel.selectAll("option").data(years).join("option")
  .attr("value",d=>d).text(d=>d);
sel.on("change",ev=>update(+ev.target.value));

function update(year){
  const data=(RAW[year]||[]).map(d=>({gname:d.gname,count:+d.count||0}));
  const total=d3.sum(data,d=>d.count);
  totalYear.text(` | Total incidents in ${year}: ${total}`);

  const top10=data.sort((a,b)=>d3.descending(a.count,b.count)).slice(0,10);

  const maxV = d3.max(top10,d=>d.count) || 1;
  x.domain([0, maxV]);
  y.domain(top10.map(d=>d.gname));
  color.domain([0, maxV]); // NEW: set color scale domain each year

  xAxisG.call(d3.axisBottom(x).ticks(6).tickSizeOuter(0));
  yAxisG.call(d3.axisLeft(y).tickSizeOuter(0));

  const bars=barsG.selectAll("rect").data(top10,d=>d.gname);

  bars.exit().remove();

  const enter = bars.enter().append("rect")
      .attr("x",0)
      .attr("y",d=>y(d.gname))
      .attr("height",y.bandwidth())
      .attr("width",0)
      .attr("fill", d => color(d.count))
      .on("mouseenter", function(){ d3.select(this).attr("stroke","#0f172a").attr("stroke-width",1); })  // hover highlight
      .on("mouseleave", function(){ d3.select(this).attr("stroke","none"); });

  enter.merge(bars)
      .transition().duration(450)
      .attr("y",d=>y(d.gname))
      .attr("height",y.bandwidth())
      .attr("width",d=>Math.max(3,x(d.count)))
      .attr("fill", d => color(d.count));  // NEW: fill from color scale

  const labels=labelsG.selectAll("text").data(top10,d=>d.gname);
  labels.exit().remove();
  labels.enter().append("text")
      .attr("class","label")
      .style("font-size","11px")
    .merge(labels)
      .transition().duration(450)
      .attr("y",d=>y(d.gname)+y.bandwidth()/2+4)
      .attr("x",d=>x(d.count)+6)
      .style("fill","#111827")
      .text(d=>d.count);
}
const start=years[0]??1970;
sel.property("value",start);
update(start);
</script>
</body>
</html>
"""


(BASE / "d3_top_groups.html").write_text(top10_html, encoding="utf-8")
print("Wrote:", BASE/"d3_top_groups.html")


Wrote: /content/Assignment3/d3_top_groups.html


In [61]:
from IPython.display import IFrame
import shutil
from google.colab import files

shutil.copy(str(BASE/"d3_top_groups.html"), "/content/d3_top_groups.html")
display(IFrame(src="/files/d3_top_groups.html", width=980, height=560))

files.download("/content/d3_top_groups.html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#Plotly

In [44]:
#preparing attack type counts per year
import pandas as pd

if {"iyear","attacktype1_txt"}.issubset(df.columns):
    attack_year = (
        df.groupby(["iyear", "attacktype1_txt"])
          .size()
          .reset_index(name="count")
    )
    # Pivot so each attack type becomes a column
    wide = attack_year.pivot(index="iyear", columns="attacktype1_txt", values="count").fillna(0).sort_index()
    print("Data prepared. Shape:", wide.shape)
    wide.head()
else:
    print("Missing required columns in your dataset.")


Data prepared. Shape: (49, 9)


In [49]:
import plotly.graph_objects as go
import numpy as np

fig = go.Figure()

if len(wide) > 0:
    years = wide.index.tolist()
    attack_types = wide.columns.tolist()

    #Adding a line (area) for each attack type
    for atk in attack_types:
        fig.add_trace(go.Scatter(
            x=years, y=wide[atk],
            mode="lines",
            name=str(atk),
            stackgroup="one",  # enables stacking
            hovertemplate="Year: %{x}<br>Attack Type: "+str(atk)+"<br>Count: %{y}<extra></extra>"
        ))

    #100% normalized version
    totals = wide.sum(axis=1).replace(0, np.nan)
    wide_pct = wide.divide(totals, axis=0) * 100

    #Buttons for modes
    buttons = [
        dict(label="Stacked",
             method="update",
             args=[{"stackgroup": ["one"] * len(fig.data),
                    "y": [wide[c].values for c in attack_types]},
                   {"yaxis": {"title": "Number of Incidents"}}]),
        dict(label="Overlay",
             method="update",
             args=[{"stackgroup": [None] * len(fig.data),
                    "y": [wide[c].values for c in attack_types]},
                   {"yaxis": {"title": "Number of Incidents"}}]),
        dict(label="100% Stacked",
             method="update",
             args=[{"stackgroup": ["one"] * len(fig.data),
                    "y": [wide_pct[c].values for c in attack_types]},
                   {"yaxis": {"title": "Percentage (%)"}}]),
    ]

    fig.update_layout(
        title=dict(
            text="Attack Types Over Time",
            x=0.5,                #center title
            y=0.97,               #placing above buttons
            xanchor="center",
            yanchor="top",
            font=dict(size=20, color="#111827", family="Arial Black")
        ),
        xaxis_title="Year",
        yaxis_title="Number of Incidents",
        legend_title="Attack Type",
        updatemenus=[dict(
            type="buttons",
            direction="right",
            x=0.0,
            y=1.08,               #buttons below title, above chart
            pad={"r": 10, "t": 0},
            buttons=buttons
        )],
        margin=dict(t=120, l=60, r=40, b=60)
    )

    fig.show()
else:
    print("No data to plot.")


In [50]:
fig.write_html("/content/Assignment3/attack_types_plot.html", include_plotlyjs="cdn")

In [51]:
files.download("/content/Assignment3/attack_types_plot.html")

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

#Bokeh

In [59]:
import numpy as np, pandas as pd
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models import ColumnDataSource, HoverTool, RangeSlider, CustomJS, CategoricalColorMapper
from bokeh.layouts import column
from bokeh.palettes import Category10, Category20, Turbo256
from bokeh.resources import CDN
from bokeh.embed import file_html
from google.colab import files
from bokeh.layouts import row

output_notebook()

needed = {"targtype1_txt","nkill","nwound","iyear"}
assert needed.issubset(df.columns), f"Missing columns: {needed - set(df.columns)}"

def prep_df(_df):
    d = _df[list(needed)].copy()
    d["nkill"] = pd.to_numeric(d["nkill"], errors="coerce").fillna(0)
    d["nwound"] = pd.to_numeric(d["nwound"], errors="coerce").fillna(0)
    d["casualties"] = d["nkill"] + d["nwound"]
    d = d.dropna(subset=["iyear"])
    d["casualties"] = d["casualties"].clip(upper=np.nanpercentile(d["casualties"], 99))
    #size scale
    cmin, cmax = float(d["casualties"].min()), float(d["casualties"].max())
    span = (cmax - cmin) if cmax > cmin else 1.0
    d["size"] = 6 + 24 * (d["casualties"] - cmin) / span
    d["targtype1_txt"] = d["targtype1_txt"].fillna("Unknown")
    return d

def pick_palette(n):
    if n <= 3:  return Category10[3][:n]
    if n <= 10: return Category10[10][:n]
    if n <= 20: return Category20[20][:n]
    idx = np.linspace(0, 255, n, dtype=int)
    return [Turbo256[i] for i in idx]
#Displaying in collab
d_disp = prep_df(df)
cats_disp = sorted(d_disp["targtype1_txt"].unique().tolist())
pal_disp = pick_palette(len(cats_disp))
mapper_disp = CategoricalColorMapper(factors=cats_disp, palette=pal_disp)

src_all_disp  = ColumnDataSource(d_disp)
src_view_disp = ColumnDataSource(d_disp)

p_disp = figure(
    title="Target Types and Casualties",
    x_axis_label="Number Killed",
    y_axis_label="Number Wounded",
    width=900, height=520,
    tools="pan,wheel_zoom,box_zoom,reset,save"
)

p_disp.scatter(
    x="nkill", y="nwound",
    size="size",
    fill_color={"field":"targtype1_txt", "transform": mapper_disp},
    fill_alpha=0.6, line_color=None, marker="circle",
    source=src_view_disp, legend_field="targtype1_txt"
)

p_disp.add_tools(HoverTool(tooltips=[
    ("Target Type", "@targtype1_txt"),
    ("Killed", "@nkill"),
    ("Wounded", "@nwound"),
    ("Casualties", "@casualties"),
    ("Year", "@iyear")
]))
p_disp.legend.location = "top_left"
p_disp.legend.click_policy = "hide"

yr_min_d, yr_max_d = int(d_disp["iyear"].min()), int(d_disp["iyear"].max())
slider_disp = RangeSlider(start=yr_min_d, end=yr_max_d, value=(yr_min_d, yr_max_d), step=1, title="Year range")

callback_disp = CustomJS(args=dict(src=src_all_disp, view=src_view_disp, slider=slider_disp), code="""
    const [lo, hi] = slider.value;
    const data = src.data;
    const out = {};
    for (const k in data) out[k] = [];
    for (let i = 0; i < data['iyear'].length; i++) {
        const y = data['iyear'][i];
        if (y >= lo && y <= hi) {
            for (const k in data) out[k].push(data[k][i]);
        }
    }
    view.data = out;
    view.change.emit();
""")
slider_disp.js_on_change("value", callback_disp)

show(column(slider_disp, p_disp))

d_exp = prep_df(df)
cats_exp = sorted(d_exp["targtype1_txt"].unique().tolist())
pal_exp = pick_palette(len(cats_exp))
mapper_exp = CategoricalColorMapper(factors=cats_exp, palette=pal_exp)

src_all_exp  = ColumnDataSource(d_exp)
src_view_exp = ColumnDataSource(d_exp)

p_exp = figure(
    title="Target Types and Casualties",
    x_axis_label="Number Killed",
    y_axis_label="Number Wounded",
    width=900, height=520,
    tools="pan,wheel_zoom,box_zoom,reset,save"
)
p_exp.scatter(
    x="nkill", y="nwound",
    size="size",
    fill_color={"field":"targtype1_txt", "transform": mapper_exp},
    fill_alpha=0.6, line_color=None, marker="circle",
    source=src_view_exp, legend_field="targtype1_txt"
)
p_exp.add_tools(HoverTool(tooltips=[
    ("Target Type", "@targtype1_txt"),
    ("Killed", "@nkill"),
    ("Wounded", "@nwound"),
    ("Casualties", "@casualties"),
    ("Year", "@iyear")
]))
p_disp.legend.location = "center"
p_disp.legend.orientation = "vertical"
p_disp.legend.label_text_font_size = "10px"
p_disp.legend.spacing = 4
p_disp.legend.glyph_width = 15
p_disp.legend.click_policy = "hide"

#Adjusting margins to give more room
p_disp.margin = (20, 100, 20, 40)

yr_min_e, yr_max_e = int(d_exp["iyear"].min()), int(d_exp["iyear"].max())
slider_exp = RangeSlider(start=yr_min_e, end=yr_max_e, value=(yr_min_e, yr_max_e), step=1, title="Year range")

callback_exp = CustomJS(args=dict(src=src_all_exp, view=src_view_exp, slider=slider_exp), code="""
    const [lo, hi] = slider.value;
    const data = src.data;
    const out = {};
    for (const k in data) out[k] = [];
    for (let i = 0; i < data['iyear'].length; i++) {
        const y = data['iyear'][i];
        if (y >= lo && y <= hi) {
            for (const k in data) out[k].push(data[k][i]);
        }
    }
    view.data = out;
    view.change.emit();
""")
slider_exp.js_on_change("value", callback_exp)

layout_export = column(slider_exp, p_exp)

html = file_html(layout_export, CDN, "Target Types and Casualties")
out_path = "/content/Assignment3/bokeh_target_casualties.html"
with open(out_path, "w", encoding="utf-8") as f:
    f.write(html)

print("Saved interactive HTML at:", out_path)
files.download(out_path)


Saved interactive HTML at: /content/Assignment3/bokeh_target_casualties.html


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>