In [1]:
import scipy.io
import numpy as np
from scipy.sparse import issparse
from graph_tool.all import Graph, sfdp_layout, graph_draw, fruchterman_reingold_layout, arf_layout, radial_tree_layout
import matplotlib.cm as cm
import matplotlib.colors as mcolors

def sanitize_matrix(mat):
    """Return a dense float numpy array from any sparse/object matrix."""
    if issparse(mat):
        return mat.toarray().astype(float)
    elif isinstance(mat, np.ndarray) and mat.dtype == object:
        try:
            return np.array([[float(cell) for cell in row] for row in mat], dtype=float)
        except Exception:
            return np.array(mat.tolist(), dtype=float)
    else:
        return np.array(mat, dtype=float)

# === Load food web ===
mat_path = "../../src/matlab/data/foodwebs_mat/Brook trout lake_tax_mass.mat"
m = scipy.io.loadmat(mat_path)

# === Extract adjacency and metadata ===
adj = sanitize_matrix(m.get("net"))
taxonomy = m.get("taxonomy", None)
mass = m.get("mass", None)

n = adj.shape[0]

# === Build graph-tool Graph ===
g = Graph(directed=True)
g.add_vertex(n)

srcs, tgts = np.where(adj > 0)
g.add_edge_list(zip(srcs, tgts))

# === Labels ===
v_label = g.new_vertex_property("string")
if taxonomy is not None:
    try:
        labels = [str(x[0]) if isinstance(x, np.ndarray) else str(x) for x in taxonomy.squeeze()]
    except Exception:
        labels = [str(i+1) for i in range(n)]
else:
    labels = [str(i+1) for i in range(n)]
for i, v in enumerate(g.vertices()):
    v_label[v] = labels[int(i)]

# === Node size ===
v_size = g.new_vertex_property("double")
if mass is not None and len(np.ravel(mass)) == n:
    mvals = np.asarray(np.ravel(mass), dtype=float)
    mmin, mmax = float(np.min(mvals)), float(np.max(mvals))
    # scaled = 20 + 30 * (mvals - mmin) / (mmax - mmin) if mmax > mmin else np.full(n, 25.0)
    scaled = 10 + 15 * (mvals - mmin) / (mmax - mmin) if mmax > mmin else np.full(n, 12.0)
    for i, v in enumerate(g.vertices()):
        v_size[v] = float(scaled[i])
else:
    for v in g.vertices():
        v_size[v] = 15 + 2.5 * g.get_out_degree(v)

# === Text colors (all red) ===
v_text_color = g.new_vertex_property("vector<double>")
for v in g.vertices():
    v_text_color[v] = (1.0, 0.0, 0.0, 1.0)  # pure red RGBA

# === Transparent fill ===
transparent_fill = g.new_vertex_property("vector<double>")
for v in g.vertices():
    transparent_fill[v] = (1, 1, 1, 0)  # RGBA white, fully transparent

# === Layout & draw ===
# pos = sfdp_layout(g)
pos = fruchterman_reingold_layout(g, n_iter=5000)
# pos = arf_layout(g)
# pos = radial_tree_layout(g, g.vertex(0))
graph_draw(
    g, pos=pos,
    vertex_text=v_label,
    vertex_size=v_size,
    vertex_fill_color=transparent_fill,   # transparent fill
    vertex_color=(0.5, 0.5, 0.5, 1.0),    # thin gray outline
    vertex_text_color=v_text_color,       # taxonomy-based colors
    edge_color=(0.5, 0.5, 0.5, 1.0),
    edge_pen_width=3.0,
    output_size=(1000, 200),
    output="Brook trout lake_graph.png"  # uncomment to save
)

# print(f"Plotted food web graph for: {mat_path.split('/')[-1].replace('.mat', '')}")

<VertexPropertyMap object with value type 'vector<double>', for Graph 0x137efb7d0, at 0x177bee840>

In [2]:
import pandas as pd

dataset_path = pd.read_csv("../../data/raw/283_3_Dataset/283_2_FoodWebDataBase_2018_12_10.csv")
dataset_path.info()

  dataset_path = pd.read_csv("../../data/raw/283_3_Dataset/283_2_FoodWebDataBase_2018_12_10.csv")


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 222151 entries, 0 to 222150
Data columns (total 46 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   autoID                      222151 non-null  int64  
 1   link.citation               222151 non-null  object 
 2   link.methodology            65980 non-null   object 
 3   interaction.type            220177 non-null  object 
 4   interaction.dimensionality  203777 non-null  object 
 5   interaction.classification  221249 non-null  object 
 6   con.taxonomy                222151 non-null  object 
 7   con.taxonomy.level          214126 non-null  object 
 8   con.common                  57159 non-null   object 
 9   con.lifestage               138535 non-null  object 
 10  con.metabolic.type          222151 non-null  object 
 11  con.movement.type           222120 non-null  object 
 12  con.size.citation           47387 non-null   object 
 13  con.size.metho

In [5]:
dataset_path.columns

Index(['autoID', 'link.citation', 'link.methodology', 'interaction.type',
       'interaction.dimensionality', 'interaction.classification',
       'con.taxonomy', 'con.taxonomy.level', 'con.common', 'con.lifestage',
       'con.metabolic.type', 'con.movement.type', 'con.size.citation',
       'con.size.method', 'con.length.min.cm.', 'con.length.mean.cm.',
       'con.length.max.cm.', 'con.mass.min.g.', 'con.mass.mean.g.',
       'con.mass.max.g.', 'res.taxonomy', 'res.taxonomy.level', 'res.common',
       'res.lifestage', 'res.metabolic.type', 'res.movement.type',
       'res.size.citation', 'res.size.method', 'res.length.min.cm.',
       'res.length.mean.cm.', 'res.length.max.cm.', 'res.mass.min.g.',
       'res.mass.mean.g.', 'res.mass.max.g.', 'geographic.location',
       'longitude', 'latitude', 'ecosystem.type', 'study.site', 'altitude',
       'depth', 'sampling.time', 'sampling.start.year', 'sampling.end.year',
       'notes', 'foodweb.name'],
      dtype='object')

In [6]:
dataset_path.value_counts("latitude")

latitude
-73.000000    16041
 37.492083    15821
 48.400000     8006
 48.380000     5650
 48.370000     5406
              ...  
-23.828842       24
 78.794285       21
 43.600000       19
-23.812962       17
 48.612209       16
Name: count, Length: 242, dtype: int64

In [8]:
dataset_path["latitude"].info()

<class 'pandas.core.series.Series'>
RangeIndex: 222151 entries, 0 to 222150
Series name: latitude
Non-Null Count   Dtype  
--------------   -----  
222151 non-null  float64
dtypes: float64(1)
memory usage: 1.7 MB


In [1]:
# pip install keplergl
import os
import pandas as pd
from keplergl import KeplerGl

# === Paths ===
csv_path = "../../data/raw/283_3_Dataset/283_2_FoodWebDataBase_2018_12_10.csv"
out_html = "foodwebs_global_kepler.html"

# === Parámetro: estilo del mapa ===
#   Opciones típicas: "light", "dark", "muted", "satellite", "terrain"
BASE_STYLE = "light"       # <- cámbialo a "satellite" si lo prefieres

# === Carga ===
df = pd.read_csv(csv_path, low_memory=False)

def pick_col(df, options):
    for c in options:
        if c in df.columns: return c
    return None

lat_col = pick_col(df, ["latitude"])
lon_col = pick_col(df, ["longitude"])
eco_col = pick_col(df, ["ecosystem.type"])
name_col = pick_col(df, ["foodweb.name"])
nodes_col = pick_col(df, ["Nodes", "n_nodes", "N", "n"])

required = [lat_col, lon_col, name_col]
if any(x is None for x in required):
    raise ValueError(f"Faltan columnas clave. lat={lat_col}, lon={lon_col}, name={name_col}")

# === Limpieza ===
df = df[pd.to_numeric(df[lat_col], errors="coerce").notna() &
        pd.to_numeric(df[lon_col], errors="coerce").notna()].copy()
df[lat_col] = df[lat_col].astype(float).clip(-90, 90)
df[lon_col] = df[lon_col].astype(float).clip(-180, 180)

def strip_suffix(s): return str(s).replace("_tax_mass", "")

df["Foodweb_clean"] = df[name_col].apply(strip_suffix)
df["N_nodes"] = pd.to_numeric(df[nodes_col], errors="coerce").fillna(1).astype(int) if nodes_col else 1
df["Ecosystem"] = df[eco_col].astype(str) if eco_col else "unknown"

# === Ajustes de legibilidad según estilo ===
if BASE_STYLE == "light":
    text_color = [0, 0, 0]        # texto oscuro sobre base clara
    stroke_color = [0, 0, 0]
elif BASE_STYLE == "satellite":
    text_color = [255, 255, 255]     # texto claro sobre imagen satelital
    stroke_color = [255, 255, 255]
else:
    text_color = [255, 255, 255]
    stroke_color = [255, 255, 255]

# === Config KeplerGL ===
kepler_config = {
  "version": "v1",
  "config": {
    "visState": {
      "filters": [],
      "layers": [
        {
          "id": "foodwebs-point-layer",
          "type": "point",
          "config": {
            "dataId": "foodwebs",
            "label": "Food Webs",
            "color": [18, 147, 154],
            "columns": {"lat": lat_col, "lng": lon_col, "altitude": None},
            "isVisible": True,
            "visConfig": {
              "radius": 7,
              "fixedRadius": False,
              "opacity": 1,
              "outline": True,
              "thickness": 1,
              "strokeColor": stroke_color,
              "colorRange": {
                "name": "ColorBrewer Set3-12",   # paleta cualitativa amplia y legible
                "type": "qualitative",
                "category": "Categorical",
                "colors": ["#8dd3c7","#ffffb3","#bebada","#fb8072","#80b1d3",
                           "#fdb462","#b3de69","#fccde5","#d9d9d9","#bc80bd",
                           "#ccebc5","#ffed6f"]
              },
              "radiusRange": [2, 35]
            },
            # "textLabel": [
            #   {
            #     "field": {"name": "Foodweb_clean", "type": "string"},
            #     "color": text_color,
            #     "size": 14,                 # ↑ tamaño para mejor lectura
            #     "offset": [8, 0],           # separa el texto del punto
            #     "anchor": "start",
            #     "alignment": "center",
            #     "background": True          # fondo para contraste (si tu versión lo soporta)
            #   }
            # ]
          },
          "visualChannels": {
            "colorField": {"name": "Ecosystem", "type": "string"},
            "colorScale": "ordinal",
            "sizeField": {"name": "N_nodes", "type": "integer"},
            "sizeScale": "sqrt"
          }
        }
      ],
      "interactionConfig": {
        "tooltip": {
          "fieldsToShow": {
            "foodwebs": [
              {"name": "Foodweb_clean", "format": None},
              {"name": "Ecosystem", "format": None},
              {"name": "N_nodes", "format": None},
              {"name": lat_col, "format": ".4f"},
              {"name": lon_col, "format": ".4f"}
            ]
          },
          "enabled": True
        }
      }
    },
    "mapState": {"bearing": 0, "pitch": 0, "latitude": 20, "longitude": 0, "zoom": 1.6},
    "mapStyle": {
      "styleType": BASE_STYLE,
      # Opcional: desactiva labels/roads si quieres más limpieza visual
      "visibleLayerGroups": {
        "label": True,
        "road": True,
        "border": True,
        "building": True,
        "water": True,
        "land": True
      }
    }
  }
}

# === Render ===
m = KeplerGl(height=640, config=kepler_config)
m.add_data(data=df, name="foodwebs")
m.save_to_html(file_name=out_html, read_only=True)

User Guide: https://docs.kepler.gl/docs/keplergl-jupyter


Out of range float values are not JSON compliant: nan
Supporting this message is deprecated in jupyter-client 7, please make sure your message is JSON-compliant
  content = self.pack(content)


Map saved to foodwebs_global_kepler.html!


In [2]:
import os
import pandas as pd
import numpy as np
import folium
from folium.plugins import MarkerCluster

# === Paths ===
csv_path = "../../data/raw/283_3_Dataset/283_2_FoodWebDataBase_2018_12_10.csv"

# === Carga ===
df = pd.read_csv(csv_path, low_memory=False).copy()

# === Columnas esperadas en este dataset ===
lat_col  = "latitude"
lon_col  = "longitude"
eco_col  = "ecosystem.type"
name_col = "foodweb.name"

# N opcional: intenta detectar alguna columna razonable
candidate_nodes = ["Nodes", "n_nodes", "N", "n", "node_count", "num_nodes"]
nodes_col = next((c for c in candidate_nodes if c in df.columns), None)

# === Limpieza coordenadas ===
df = df[pd.to_numeric(df[lat_col], errors="coerce").notna() &
        pd.to_numeric(df[lon_col], errors="coerce").notna()].copy()

df[lat_col] = pd.to_numeric(df[lat_col], errors="coerce").clip(-90, 90)
df[lon_col] = pd.to_numeric(df[lon_col], errors="coerce").clip(-180, 180)

# === Campos auxiliares ===
def strip_suffix(s):
    # Por compatibilidad si hay nombres con _tax_mass (en este dataset quizá no)
    return str(s).replace("_tax_mass", "")

df["Foodweb_clean"] = df[name_col].astype(str).apply(strip_suffix)
df["Ecosystem"] = df[eco_col].astype(str).fillna("unknown")

if nodes_col is not None:
    df["N_nodes"] = pd.to_numeric(df[nodes_col], errors="coerce").fillna(1).astype(int)
else:
    df["N_nodes"] = 1

# === Mapa base ===
m = folium.Map(location=[20, 0], zoom_start=2, tiles="CartoDB Positron")

# Paleta cualitativa por ecosistema (extendible)
ecosystems = sorted(df["Ecosystem"].unique())
palette = [
    "#66c2a5","#fc8d62","#8da0cb","#e78ac3","#a6d854",
    "#ffd92f","#e5c494","#b3b3b3","#a1d99b","#9ecae1",
    "#fdae6b","#c994c7"
]
color_map = {eco: palette[i % len(palette)] for i, eco in enumerate(ecosystems)}

# === Capa por ecosistema con MarkerCluster por capa (mejor control visual) ===
layer_groups = {}

for eco in ecosystems:
    layer = folium.FeatureGroup(name=f"{eco}", show=True)
    cluster = MarkerCluster(name=f"Cluster {eco}", disableClusteringAtZoom=7)
    cluster.add_to(layer)
    layer.add_to(m)
    layer_groups[eco] = (layer, cluster)

# === Escalado de radio (suave) por N_nodes (si hay variación) ===
# Usamos raíz cuadrada y acotamos por cuantiles para evitar outliers grandes
if df["N_nodes"].nunique() > 1:
    qlow, qhigh = df["N_nodes"].quantile([0.05, 0.95])
    def scaled_radius(n):
        n_clip = np.clip(n, qlow, qhigh)
        # base ~ sqrt, ajusta 3..10 px
        return float(np.interp(np.sqrt(n_clip), 
                               [np.sqrt(qlow), np.sqrt(qhigh)], 
                               [4, 10]))
else:
    def scaled_radius(n):
        return 6.0

# === Dibujar puntos ===
for _, r in df.iterrows():
    eco = r["Ecosystem"]
    color = color_map.get(eco, "#b3b3b3")
    radius = scaled_radius(r["N_nodes"])
    popup = folium.Popup(
        f"<b>{r['Foodweb_clean']}</b><br>"
        f"Ecosystem: {eco}<br>"
        f"N: {int(r['N_nodes'])}<br>"
        f"({r[lat_col]:.4f}, {r[lon_col]:.4f})",
        max_width=260
    )
    marker = folium.CircleMarker(
        location=[float(r[lat_col]), float(r[lon_col])],
        radius=radius,
        color=color,
        fill=True,
        fill_opacity=0.85,
        weight=1
    )
    marker.add_child(popup)
    # Agregar al cluster del ecosistema correspondiente
    layer_groups[eco][1].add_child(marker)

# === Control de capas ===
folium.LayerControl(collapsed=False).add_to(m)
m.save("foodwebs_global_folium.html")