# Master Dataset separation using 'ecosystem.type' column

In [3]:
import pandas as pd
import os

# --- Configuration ---
input_csv = "/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/283_2_FoodWebDataBase_2018_12_10 (Original).csv"
output_dir = "/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_mat_by_ecosystem/CSV"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# --- Load Data ---
df = pd.read_csv(input_csv, low_memory=False)

# Ensure relevant columns exist
required_columns = {'foodweb.name', 'ecosystem.type'}
missing = required_columns - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {missing}")

# --- Group by Ecosystem and Save ---
summary = []

for ecosystem_type, group in df.groupby('ecosystem.type'):
    # Clean ecosystem name for file naming
    safe_name = ecosystem_type.lower().replace(" ", "_").replace("/", "-")
    output_path = os.path.join(output_dir, f"{safe_name}_foodweb.csv")
    
    group.to_csv(output_path, index=False)
    
    unique_foodwebs = group['foodweb.name'].nunique()
    summary.append((ecosystem_type, unique_foodwebs, len(group)))

# --- Summary Report ---
print("\nSummary Report:")
print(f"{'Ecosystem Type':<30} {'# Food Webs':<15} {'# Total Interactions'}")
print("-" * 70)
for eco, fw_count, row_count in summary:
    print(f"{eco:<30} {fw_count:<15} {row_count}")
print("-" * 70)
print(f"{'# Total:':<30} {sum(fw_count for _, fw_count, _ in summary):<15} {sum(row_count for _, _, row_count in summary)}")

# Convert CSV to `net` variable as a binary adjancency matrix and then to a MAT file

In [1]:
import pandas as pd
import os
from scipy.io import savemat
from scipy.sparse import csr_matrix
import numpy as np

# --- Configuration ---
input_csv = "/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/283_2_FoodWebDataBase_2018_12_10 (Original).csv"
csv_output_dir = "/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_mat_by_ecosystem/CSV"
mat_output_dir = "/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_mat_by_ecosystem/MAT"
os.makedirs(csv_output_dir, exist_ok=True)
os.makedirs(mat_output_dir, exist_ok=True)

# --- Load full data ---
df = pd.read_csv(input_csv, low_memory=False)

# Validate required columns
required_columns = {'foodweb.name', 'ecosystem.type', 'con.taxonomy', 'res.taxonomy'}
missing = required_columns - set(df.columns)
if missing:
    raise ValueError(f"Missing required columns: {missing}")

# --- Group by Ecosystem and Convert ---
summary = []

for ecosystem_type, group in df.groupby('ecosystem.type'):
    safe_name = ecosystem_type.lower().replace(" ", "_").replace("/", "-")

    # Optional: Save raw CSV
    # group.to_csv(os.path.join(csv_output_dir, f"{safe_name}_foodweb.csv"), index=False)

    # Extract unique interactions
    con_res_df = group[['con.taxonomy', 'res.taxonomy']].dropna().drop_duplicates()

    # Get all unique species and build index
    species = sorted(set(con_res_df['con.taxonomy']) | set(con_res_df['res.taxonomy']))
    species_index = {name: idx for idx, name in enumerate(species)}
    N = len(species)

    # Prepare sparse matrix data
    row_idx = []
    col_idx = []
    data = []

    for _, row in con_res_df.iterrows():
        i = species_index[row['con.taxonomy']]
        j = species_index[row['res.taxonomy']]
        row_idx.append(i)
        col_idx.append(j)
        data.append(1)

    # Create sparse binary adjacency matrix
    net_sparse = csr_matrix((data, (row_idx, col_idx)), shape=(N, N), dtype=np.int8)

    # Save as sparse to .mat
    mat_path = os.path.join(mat_output_dir, f"{safe_name}_foodweb.mat")
    savemat(mat_path, {'net': net_sparse})

    # Summary
    unique_foodwebs = group['foodweb.name'].nunique()
    summary.append((ecosystem_type, unique_foodwebs, len(con_res_df), len(species)))

# --- Summary Report ---
print("\nSummary Report:")
print(f"{'Ecosystem Type':<30} {'# Food Webs':<15} {'# Unique Links':<20} {'# Species'}")
print("-" * 90)
for eco, fw_count, link_count, sp_count in summary:
    print(f"{eco:<30} {fw_count:<15} {link_count:<20} {sp_count}")
print("-" * 90)


Summary Report:
Ecosystem Type                 # Food Webs     # Unique Links       # Species
------------------------------------------------------------------------------------------
lakes                          60              3305                 388
marine                         135             50446                2536
streams                        28              13681                622
terrestrial aboveground        21              13407                775
terrestrial belowground        52              35925                1108
------------------------------------------------------------------------------------------


# Group by consumer-resource pairs and count how often each occurs and add them as a link weight

In [21]:
import pandas as pd

# --- Load the CSV ---
df = pd.read_csv("/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_mat_by_ecosystem/CSV/lakes_foodweb.csv", low_memory=False)

# --- Filter for necessary columns only ---
df = df[['con.taxonomy', 'res.taxonomy']].dropna()

# --- Count interactions (edge weights) ---
# Group by consumer-resource pairs and count how often each occurs
interaction_counts = df.groupby(['con.taxonomy', 'res.taxonomy']).size().reset_index(name='weight')

# --- Create weighted directed adjacency matrix ---
adj_matrix = interaction_counts.pivot_table(
    index='con.taxonomy',
    columns='res.taxonomy',
    values='weight',
    fill_value=0
)

# --- Optional: Export the adjacency matrix to CSV ---
# adj_matrix.to_csv("weighted_adjacency_matrix.csv")

# --- Display summary ---
print(f"Number of unique consumers: {adj_matrix.shape[0]}")
print(f"Number of unique resources: {adj_matrix.shape[1]}")
print(f"Total number of unique interactions: {interaction_counts.shape[0]}")

Number of unique consumers: 203
Number of unique resources: 368
Total number of unique interactions: 3305


In [17]:
interaction_counts

Unnamed: 0,con.taxonomy,res.taxonomy,weight
0,Acanthocyclops vernalis,Chironomidae indet.,1
1,Acanthocyclops vernalis,Chydorus latus,1
2,Acanthocyclops vernalis,Corynoneura scutellata,1
3,Acanthocyclops vernalis,Enchytraidae sp.,1
4,Acanthocyclops vernalis,Tanytarsus bruchonidae,1
...,...,...,...
3300,fish fry,Xanthidium armatum,1
3301,fish fry,Zygnema sp.,3
3302,fish fry,benthic detritus,32
3303,fish fry,copepod nauplii,36


# Group by interaction type on adjacency matrix per interaction type

In [4]:
import pandas as pd

# --- Load the CSV ---
df = pd.read_csv("/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_mat_by_ecosystem/CSV/lakes_foodweb.csv", low_memory=False)

# Make sure the interaction type column exists
assert 'interaction.type' in df.columns, "Missing 'interaction.type' column."

# Drop missing values in key columns
df_interactions = df[['con.taxonomy', 'res.taxonomy', 'interaction.type']].dropna()

# Group by interaction type
grouped_by_type = df_interactions.groupby(['interaction.type', 'con.taxonomy', 'res.taxonomy']).size().reset_index(name='weight')

# Dictionary to hold one matrix per interaction type
interaction_type_matrices = {}

# Create one adjacency matrix per interaction type
for interaction_type in grouped_by_type['interaction.type'].unique():
    filtered = grouped_by_type[grouped_by_type['interaction.type'] == interaction_type]
    matrix = filtered.pivot_table(
        index='con.taxonomy',
        columns='res.taxonomy',
        values='weight',
        fill_value=0
    )
    interaction_type_matrices[interaction_type] = matrix
    # Optional: Save to file
    # matrix.to_csv(f"adj_matrix_{interaction_type.lower()}.csv")

# Example: print one matrix
# print("\nSample: Predation Matrix")
# print(interaction_type_matrices.get('predacious', 'No predation matrix found.'))


In [5]:
interaction_type_matrices.get('predacious', 'No predation matrix found.')

res.taxonomy,Acanthocyclops vernalis,Acantholeberis curvirostris,Acroperus harpae,Aeshna juncea,Agabus bipustulatus,Agabus sturmii,Alona affinis,Alona costata,Alona quadrangularis,Alona rectangula,...,Trichotria tetractis,Tropocyclops prasinus,Umbra limi,Usipa,Usipa larvae,Utaka,Zooplankton,copepod nauplii,fish eggs,fish fry
con.taxonomy,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Acanthocyclops vernalis,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Aeshna juncea,1.0,0.0,0.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Agabus bipustulatus,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Agabus sturmii,1.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Ambloplites rupestris,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,1.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Umbra limi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Usipa,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
Usipa larvae,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0
Utaka,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0


In [12]:
filtered

Unnamed: 0,interaction.type,con.taxonomy,res.taxonomy,weight
1478,predacious,Acanthocyclops vernalis,Chironomidae indet.,1
1479,predacious,Acanthocyclops vernalis,Chydorus latus,1
1480,predacious,Acanthocyclops vernalis,Corynoneura scutellata,1
1481,predacious,Acanthocyclops vernalis,Enchytraidae sp.,1
1482,predacious,Acanthocyclops vernalis,Tanytarsus bruchonidae,1
...,...,...,...,...
3300,predacious,fish fry,Trichocerca similis,3
3301,predacious,fish fry,Trichotria tetractis,1
3302,predacious,fish fry,Tropocyclops prasinus,28
3303,predacious,fish fry,copepod nauplii,36


# Group by consumer-resource pairs, calculate log bodymass ratio and aggregate (mean or median) by interaction pair

In [16]:
import pandas as pd
import numpy as np

# Load your master food web data
df = pd.read_csv("/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_mat_by_ecosystem/CSV/lakes_foodweb.csv", low_memory=False)

# Rename for readability
df = df.rename(columns={
    'con.mass.mean.g.': 'con_mass',
    'res.mass.mean.g.': 'res_mass'
})

# Step 1: Filter and Clean Mass Values
df_mass = df[['con.taxonomy', 'res.taxonomy', 'con_mass', 'res_mass']].copy()

# Replace -999 with NaN and convert to float
df_mass.replace(-999, np.nan, inplace=True)
df_mass = df_mass.dropna()

# Step 2: Avoid invalid values
df_mass = df_mass[(df_mass['res_mass'] > 0) & (df_mass['con_mass'] > 0)]

# Step 3: Compute raw body mass ratio
df_mass['bodymass_ratio'] = df_mass['con_mass'] / df_mass['res_mass']

# Step 4: (Optional but recommended) Apply log transformation to stabilize scale
df_mass['log_bodymass_ratio'] = np.log10(df_mass['bodymass_ratio'])

# Step 5: Aggregate (mean or median) by interaction pair
bodymass_stats = df_mass.groupby(['con.taxonomy', 'res.taxonomy'])['log_bodymass_ratio'].mean().reset_index()
bodymass_stats.rename(columns={'log_bodymass_ratio': 'avg_log_mass_ratio'}, inplace=True)

# You can now merge this with the interaction weights from before
interaction_counts = df.groupby(['con.taxonomy', 'res.taxonomy']).size().reset_index(name='weight')

# Merge weights + mass ratio
enriched_edges = pd.merge(interaction_counts, bodymass_stats, on=['con.taxonomy', 'res.taxonomy'], how='left')

# Save enriched data
# enriched_edges.to_csv("interaction_edges_enriched.csv", index=False)

# Preview
enriched_edges


Unnamed: 0,con.taxonomy,res.taxonomy,weight,avg_log_mass_ratio
0,Acanthocyclops vernalis,Chironomidae indet.,1,-1.239641
1,Acanthocyclops vernalis,Chydorus latus,1,1.342423
2,Acanthocyclops vernalis,Corynoneura scutellata,1,-0.592076
3,Acanthocyclops vernalis,Enchytraidae sp.,1,-0.586996
4,Acanthocyclops vernalis,Tanytarsus bruchonidae,1,-0.770852
...,...,...,...,...
3300,fish fry,Xanthidium armatum,1,5.509515
3301,fish fry,Zygnema sp.,3,5.794187
3302,fish fry,benthic detritus,32,
3303,fish fry,copepod nauplii,36,0.945924


# Convert from CSV to MAT with taxonomy and mass

In [1]:
import pandas as pd
import numpy as np
from scipy.sparse import csc_matrix
from scipy.io import savemat

# Load your full CSV again
df = pd.read_csv("/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_csv/SF1M2.csv")

# Build species list
df['res.taxonomy'] = df['res.taxonomy'].astype(str)
df['con.taxonomy'] = df['con.taxonomy'].astype(str)
prey = df['res.taxonomy']
predator = df['con.taxonomy']
species = sorted(set(prey).union(set(predator)))

# Create species → index mapping
species_index = {name: i for i, name in enumerate(species)}
N = len(species)

# Build adjacency matrix (prey → predator)
adj_matrix = np.zeros((N, N), dtype=int)
for res, con in zip(prey, predator):
    i = species_index[res]
    j = species_index[con]
    adj_matrix[i, j] = 1

# Convert to sparse matrix
net_sparse = csc_matrix(adj_matrix)

# Collect mean mass per species from both prey and predator roles
res_masses = df[['res.taxonomy', 'res.mass.mean.g.']].dropna().rename(
    columns={'res.taxonomy': 'species', 'res.mass.mean.g.': 'mass'})
con_masses = df[['con.taxonomy', 'con.mass.mean.g.']].dropna().rename(
    columns={'con.taxonomy': 'species', 'con.mass.mean.g.': 'mass'})
all_masses = pd.concat([res_masses, con_masses])
species_mass = all_masses.groupby('species')['mass'].mean()

# Create aligned mass vector
taxonomy_names = np.array(species, dtype=object)
mean_masses = np.array([species_mass.get(name, np.nan) for name in species])

# Save everything to .mat
savemat("/Users/jorge/Desktop/PhD/Code/ExtractFoodWebs/foodwebs_mat_by_ecosystem/MAT_mass/SF1M2_tax_mass.mat", {
    "net": net_sparse,
    "taxonomy": taxonomy_names,
    "mass": mean_masses
})