# Packages

In [129]:
import pandas as pd
import numpy as np
import itertools
import networkx as nx
import pyvis

# Individuals

## Network preparation

In [130]:
df_individuals = pd.read_csv('_aux/Individuals.csv')

In [131]:
df_articles = pd.read_csv('_raw/Articles.csv')

In [132]:
df_articles

Unnamed: 0,text,lang
0,EPL Muscles in on El Naya Drug Corridor in Wes...,en
1,Campaign Group Warns of Dangers for Colombia E...,en
2,InSight: Report Tracks How Intra-Cartel Wars E...,en
3,Colombia Cocaine Production Breaks New Record ...,en
4,US Marijuana Vote Unlikely to Impact Mexico in...,en
...,...,...
19935,Presencia de pandillas en Tegucigalpa. La exto...,es
19936,Pandillas callejeras. Las conocidas pandillas ...,es
19937,"Imitadores. Mediante la violencia, la Mara Sal...",es
19938,ABC de la extorsión 3: El modus operandi. Un t...,es


In [133]:
df_individuals

Unnamed: 0,name,alias_1,alias_2,first_name,family_name
0,Héctor Rusthenford Guerrero Flores,Niño Guerrero,,Héctor Rusthenford,Guerrero Flores
1,Hermágoras González Polanco,Gordito González,,Hermágoras,González Polanco
2,Yeico Masacre,,,Yeico,Masacre
3,Genaro García Luna,,,Genaro,García Luna
4,Gilberto Rodríguez Orejuela,The Chess Player,,Gilberto,Rodríguez Orejuela
...,...,...,...,...,...
86,Miguel Angel Treviño Morales,Z40,,Miguel Angel,Treviño Morales
87,Diego Perez Henao,Diego Rastrojo,,Diego,Perez Henao
88,Luis E. Calle Serna,,,Luis Enrique,Calle Serna
89,Javier Antonio Calle Serna,Comba,,Javier Antonio,Calle Serna


"Úsuga" and "Calle Serna" are duplicated family names (2 copies each). Both the two "Úsuga" and the two "Calle Serna" are brothers. Hence, for them the search term will be the whole name, while for other criminals it will just be the family name.

In [134]:
df_individuals['search_term'] = np.where((df_individuals.family_name=="Úsuga") | (df_individuals.family_name=="Calle Serna"),df_individuals.first_name+' '+df_individuals.family_name,df_individuals.family_name)

In [135]:
df_individuals

Unnamed: 0,name,alias_1,alias_2,first_name,family_name,search_term
0,Héctor Rusthenford Guerrero Flores,Niño Guerrero,,Héctor Rusthenford,Guerrero Flores,Guerrero Flores
1,Hermágoras González Polanco,Gordito González,,Hermágoras,González Polanco,González Polanco
2,Yeico Masacre,,,Yeico,Masacre,Masacre
3,Genaro García Luna,,,Genaro,García Luna,García Luna
4,Gilberto Rodríguez Orejuela,The Chess Player,,Gilberto,Rodríguez Orejuela,Rodríguez Orejuela
...,...,...,...,...,...,...
86,Miguel Angel Treviño Morales,Z40,,Miguel Angel,Treviño Morales,Treviño Morales
87,Diego Perez Henao,Diego Rastrojo,,Diego,Perez Henao,Perez Henao
88,Luis E. Calle Serna,,,Luis Enrique,Calle Serna,Luis Enrique Calle Serna
89,Javier Antonio Calle Serna,Comba,,Javier Antonio,Calle Serna,Javier Antonio Calle Serna


Network creation

In [136]:
dict_individuals = dict(zip(df_individuals.name,df_individuals.search_term))

In [137]:
value_combinations = list(itertools.combinations(df_individuals.name.values, 2))
df_connections = pd.DataFrame(value_combinations, columns=['ind_1','ind_2'])
df_connections['weight'] = 0
df_connections.reset_index(drop=True, inplace=True)

In [138]:
df_connections

Unnamed: 0,ind_1,ind_2,weight
0,Héctor Rusthenford Guerrero Flores,Hermágoras González Polanco,0
1,Héctor Rusthenford Guerrero Flores,Yeico Masacre,0
2,Héctor Rusthenford Guerrero Flores,Genaro García Luna,0
3,Héctor Rusthenford Guerrero Flores,Gilberto Rodríguez Orejuela,0
4,Héctor Rusthenford Guerrero Flores,Jobanis de Jesús Ávila Villadiego,0
...,...,...,...
4090,Diego Perez Henao,Javier Antonio Calle Serna,0
4091,Diego Perez Henao,Guillermo Leon Saenz Vargas,0
4092,Luis E. Calle Serna,Javier Antonio Calle Serna,0
4093,Luis E. Calle Serna,Guillermo Leon Saenz Vargas,0


In [139]:
for article in df_articles.text.values:
    for row in range(len(df_connections)):
        if dict_individuals[df_connections.loc[row,'ind_1']] in article and dict_individuals[df_connections.loc[row,'ind_2']] in article:
            df_connections.loc[row,'weight'] += 1

In [140]:
dict_sizes = {}
for name in df_individuals.name.values:
    dict_sizes[name] = 0
    for article in df_articles.text.values:
        if dict_individuals[name] in article:
            dict_sizes[name] += 1

## Network visualisation

In [141]:
df_connections

Unnamed: 0,ind_1,ind_2,weight
0,Héctor Rusthenford Guerrero Flores,Hermágoras González Polanco,0
1,Héctor Rusthenford Guerrero Flores,Yeico Masacre,0
2,Héctor Rusthenford Guerrero Flores,Genaro García Luna,0
3,Héctor Rusthenford Guerrero Flores,Gilberto Rodríguez Orejuela,0
4,Héctor Rusthenford Guerrero Flores,Jobanis de Jesús Ávila Villadiego,0
...,...,...,...
4090,Diego Perez Henao,Javier Antonio Calle Serna,14
4091,Diego Perez Henao,Guillermo Leon Saenz Vargas,0
4092,Luis E. Calle Serna,Javier Antonio Calle Serna,16
4093,Luis E. Calle Serna,Guillermo Leon Saenz Vargas,0


In [142]:
elist = list(zip(df_connections['ind_1'],df_connections['ind_2'],df_connections['weight']))
g = nx.Graph()
g.add_weighted_edges_from([(el[0],el[1],el[2]) for el in elist if el[2]!=0])

In [145]:
nt = pyvis.network.Network('1500px','1500px', bgcolor="#222222", font_color="white") #,select_menu=True,filter_menu=True)
nt.barnes_hut()
nt.from_nx(g) 
nt.toggle_physics(True)

for node in nt.nodes:
    node['font']['size'] = 100
    node['shape'] = 'circularImage'
    node['image'] = f"/Users/federico.bindi/Desktop/GitHub/organized-crime-articles/_aux/pictures/{node['id']}.png"
    node['size'] = dict_sizes[node['id']]

nt.show('_out/networks/individuals.html',notebook=False)

_out/networks/individuals.html


In [146]:
nt.nodes

[{'color': '#97c2fc',
  'size': 5,
  'id': 'Héctor Rusthenford Guerrero Flores',
  'label': 'Héctor Rusthenford Guerrero Flores',
  'shape': 'circularImage',
  'font': {'color': 'white', 'size': 100},
  'image': '_aux/pictures/Héctor Rusthenford Guerrero Flores.png'},
 {'color': '#97c2fc',
  'size': 1131,
  'id': 'Pedro Oliverio Guerrero',
  'label': 'Pedro Oliverio Guerrero',
  'shape': 'circularImage',
  'font': {'color': 'white', 'size': 100},
  'image': '_aux/pictures/Pedro Oliverio Guerrero.png'},
 {'color': '#97c2fc',
  'size': 37,
  'id': 'Yeico Masacre',
  'label': 'Yeico Masacre',
  'shape': 'circularImage',
  'font': {'color': 'white', 'size': 100},
  'image': '_aux/pictures/Yeico Masacre.png'},
 {'color': '#97c2fc',
  'size': 14,
  'id': 'Genaro García Luna',
  'label': 'Genaro García Luna',
  'shape': 'circularImage',
  'font': {'color': 'white', 'size': 100},
  'image': '_aux/pictures/Genaro García Luna.png'},
 {'color': '#97c2fc',
  'size': 703,
  'id': 'Pablo Escobar',
 

# Groups

## Network preparation

## Network visualisation