---
### Reducing the number of edges: find a rational rule

1. Set window: Confine the lifespan of event, probably 1-2 months. This is the easiest way. Plus, it is crucial for time-series analysis.  
One thing that should be considered is, there are two types of events: powerful or negligible.

2. it looks like power-law function. $f= p^{-t}$ is basic form of it, we (may) need to modify this. 

3. Using collective attention: how long a event-related hashtag survive. but it is hard to find event-specific hashtag, and also most of events are not mentioned in Twitter.

1 and 2 seems to be plausible (and probably work at the same time), but 3 seem like unfeasible.

---

In [2]:
import math
import numpy as np
import pandas as pd
import networkx as nx
import seaborn as sns
import matplotlib.pyplot as plt
import numbers

In [None]:
# import dataset
df = pd.read_csv("6before-network.csv", index_col=0)
df.head(10)

In [None]:
log_n_killed = []
log_n_injured = []
for i in range(len(df)):
    killed = df['n_killed'][i]
    injured = df['n_injured'][i]
    log_killed = math.log(1 + killed)
    log_n_killed.append(log_killed)
    log_injured = math.log(1 + injured)
    log_n_injured.append(log_injured)

df['log_n_killed'] = log_n_killed
df['log_n_injured'] = log_n_injured

In [None]:
df['p'] = 0.7*df['log_n_killed'] + 0.3*df['log_n_injured']

In [None]:
## delete useless rows
df = df.drop(df.columns[[11,12,13,14,15,16,17,18,19,20,21,22,23,24]], axis='columns')

In [None]:
df.to_csv("7logarithm_transformation.csv", index = False)

### read data

In [3]:
df = pd.read_csv("7logarithm_transformation.csv")

### Tomorrow model

In [36]:
G1d = nx.DiGraph()

for n in range(0, len(df), 1):
  G1d.add_node(n,
             ind = df['ind'][n],
             ind_t = df['ind_t'][n],
             p = df['p'][n], 
             p_class = df['p_class'][n],
             n_killed = df['log_n_killed'][n],
             n_injured = df['log_n_injured'][n],
             LON = df['LON'][n],
             LAT = df['LAT'][n])

for i in G1d.nodes():
  for j in G1d.nodes():
    if 0 < G1d.nodes[j]['ind_t']-G1d.nodes[i]['ind_t'] <= 1 and G1d.nodes[j]['ind'] > G1d.nodes[i]['ind']:
      G1d.add_edge(i,j)
    else:
      pass

print(nx.info(G1d))

Name: 
Type: DiGraph
Number of nodes: 2087
Number of edges: 3681
Average in degree:   1.7638
Average out degree:   1.7638


In [37]:
nx.write_gexf(G1d, "graph/G1d.gexf")

### 7 days

In [38]:
G7d = nx.DiGraph()

for n in range(0, len(df), 1):
  G7d.add_node(n,
             ind = df['ind'][n],
             ind_t = df['ind_t'][n],
             p = df['p'][n],
             p_class = df['p_class'][n],
             n_killed = df['log_n_killed'][n],
             n_injured = df['log_n_injured'][n],
             LON = df['LON'][n],
             LAT = df['LAT'][n])

for i in G7d.nodes():
  for j in G7d.nodes():
    if 0 < G7d.nodes[j]['ind_t']-G7d.nodes[i]['ind_t'] <= 7 and G7d.nodes[j]['ind'] > G7d.nodes[i]['ind']:
      G7d.add_edge(i,j)
    else:
      pass

print(nx.info(G7d))

Name: 
Type: DiGraph
Number of nodes: 2087
Number of edges: 25600
Average in degree:  12.2664
Average out degree:  12.2664


In [39]:
nx.write_gexf(G7d, "graph/G7d.gexf")

### 14 days

In [40]:
G14d = nx.DiGraph()

for n in range(0, len(df), 1):
  G14d.add_node(n,
             ind = df['ind'][n],
             ind_t = df['ind_t'][n],
             p = df['p'][n],      
             p_class = df['p_class'][n],
             n_killed = df['log_n_killed'][n],
             n_injured = df['log_n_injured'][n],
             LON = df['LON'][n],
             LAT = df['LAT'][n])

for i in G14d.nodes():
  for j in G14d.nodes():
    if 0 < G14d.nodes[j]['ind_t']-G14d.nodes[i]['ind_t'] <= 14 and G14d.nodes[j]['ind'] > G14d.nodes[i]['ind']:
      G14d.add_edge(i,j)
    else:
      pass

print(nx.info(G14d))

Name: 
Type: DiGraph
Number of nodes: 2087
Number of edges: 50893
Average in degree:  24.3857
Average out degree:  24.3857


In [41]:
nx.write_gexf(G14d, "graph/G14d.gexf")

### 30 days

In [42]:
G30d = nx.DiGraph()

for n in range(0, len(df), 1):
  G30d.add_node(n,
             ind = df['ind'][n],
             ind_t = df['ind_t'][n],
             p = df['p'][n],  
             p_class = df['p_class'][n],
             n_killed = df['log_n_killed'][n],
             n_injured = df['log_n_injured'][n],
             LON = df['LON'][n],
             LAT = df['LAT'][n])

for i in G30d.nodes():
  for j in G30d.nodes():
    if 0 < G30d.nodes[j]['ind_t']-G30d.nodes[i]['ind_t'] <= 30 and G30d.nodes[j]['ind'] > G30d.nodes[i]['ind']:
      G30d.add_edge(i,j)
    else:
      pass

nx.write_gexf(G30d, "graph/G30d.gexf")
print(nx.info(G30d))

Name: 
Type: DiGraph
Number of nodes: 2087
Number of edges: 108446
Average in degree:  51.9626
Average out degree:  51.9626


### Haversine distance

In [43]:
class get_dist():
   @staticmethod
   def degree2radius(degree):
        return degree * (math.pi/180)
   @staticmethod 
   def distance(x1, y1, x2, y2, round_decimal_digits = 5): ## harversion
    if x1 is None or y1 is None or x2 is None or y2 is None:
        return None
    assert isinstance(x1, numbers.Number) and -180 <= x1 and x1 <= 180
    assert isinstance(y1, numbers.Number) and -90 <= y1 and y1 <= 90
    assert isinstance(x2, numbers.Number) and -180 <= x2 and x2 <= 180
    assert isinstance(y2, numbers.Number) and -90 <= y2 and y2 <= 90
    
    R = 6371
    dLon = get_dist.degree2radius(x2-x1)
    dLat = get_dist.degree2radius(y2-y1)
    
    a = math.sin(dLat/2) * math.sin(dLat/2)\
    + (math.cos(get_dist.degree2radius(y1))\
       * math.cos(get_dist.degree2radius(y2))\
       *math.sin(dLon/2) * math.sin(dLon/2))
    b = 2 * math.atan2(math.sqrt(a), math.sqrt(1-a))
    return round(R*b, round_decimal_digits)

### 5km without time constraints

In [46]:
G0_5km = nx.DiGraph()

for n in range(0, len(df), 1):
  G0_5km.add_node(n,
             ind = df['ind'][n],
             ind_t = df['ind_t'][n],
             p = df['p'][n],
             p_class = df['p_class'][n],
             n_killed = df['log_n_killed'][n],
             n_injured = df['log_n_injured'][n],
             state = df['state'][n],
             LON = df['LON'][n],
             LAT = df['LAT'][n])

for i in G0_5km.nodes():
  for j in G0_5km.nodes():
    if G0_5km.nodes[j]['ind'] > G0_5km.nodes[i]['ind'] and get_dist.distance(G0_5km.nodes[i]['LON'], G0_5km.nodes[i]['LAT'], G0_5km.nodes[j]['LON'], G0_5km.nodes[j]['LAT']) < 5:
      G0_5km.add_edge(i,j)
    else:
      pass

nx.write_gexf(G0_5km, "graph/G0_5km.gexf")
print(nx.info(G0_5km))

Name: 
Type: DiGraph
Number of nodes: 2087
Number of edges: 7980
Average in degree:   3.8237
Average out degree:   3.8237


In [47]:
G0_3km = nx.DiGraph()

for n in range(0, len(df), 1):
  G0_3km.add_node(n,
             ind = df['ind'][n],
             ind_t = df['ind_t'][n],
             p = df['p'][n],
             p_class = df['p_class'][n],
             n_killed = df['log_n_killed'][n],
             n_injured = df['log_n_injured'][n],
             state = df['state'][n],
             LON = df['LON'][n],
             LAT = df['LAT'][n])

for i in G0_3km.nodes():
  for j in G0_3km.nodes():
    if G0_3km.nodes[j]['ind'] > G0_3km.nodes[i]['ind'] and get_dist.distance(G0_3km.nodes[i]['LON'], G0_3km.nodes[i]['LAT'], G0_3km.nodes[j]['LON'], G0_3km.nodes[j]['LAT']) < 3:
      G0_3km.add_edge(i,j)
    else:
      pass

nx.write_gexf(G0_3km, "graph/G0_3km.gexf")
print(nx.info(G0_3km))

Name: 
Type: DiGraph
Number of nodes: 2087
Number of edges: 4109
Average in degree:   1.9689
Average out degree:   1.9689


In [48]:
G0_1km = nx.DiGraph()

for n in range(0, len(df), 1):
  G0_1km.add_node(n,
             ind = df['ind'][n],
             ind_t = df['ind_t'][n],
             p = df['p'][n],
             p_class = df['p_class'][n],
             n_killed = df['log_n_killed'][n],
             n_injured = df['log_n_injured'][n],
             state = df['state'][n],
             LON = df['LON'][n],
             LAT = df['LAT'][n])

for i in G0_1km.nodes():
  for j in G0_1km.nodes():
    if G0_1km.nodes[j]['ind'] > G0_1km.nodes[i]['ind'] and get_dist.distance(G0_1km.nodes[i]['LON'], G0_1km.nodes[i]['LAT'], G0_1km.nodes[j]['LON'], G0_1km.nodes[j]['LAT']) < 1:
      G0_1km.add_edge(i,j)
    else:
      pass

nx.write_gexf(G0_1km, "graph/G0_1km.gexf")
print(nx.info(G0_1km))

Name: 
Type: DiGraph
Number of nodes: 2087
Number of edges: 736
Average in degree:   0.3527
Average out degree:   0.3527


In [74]:
df.to_csv("7logarithm_transformation.csv", index = False)