<a href="https://colab.research.google.com/github/AdityaR4702/Networks-Oil-Shocks/blob/main/code_centrality.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import networkx as nx
import pandas as pd
import random
import matplotlib.pyplot as plt

In [3]:
def load_and_clean_data(file_path):
    """Loads the dataset and removes non-country-specific rows."""
    df = pd.read_csv(file_path)
    df = df.iloc[:-3,]  # Remove last three rows
    return df

def extract_country_industry(label):
    """Extracts country and industry from a given label."""
    if pd.isna(label) or "_" not in label:
        return None, None
    parts = label.split("_", 1)
    return parts[0], parts[1]

def create_multilayer_network(df):
    """Creates a multilayer directed network from the dataset."""
    G = nx.DiGraph()

    # Add nodes
    for index, row in df.iterrows():
      country, industry = extract_country_industry(row['V1'])
      if industry:
          node_label = f"{country}_{industry}"
          G.add_node(node_label)
          G.nodes[node_label]["country"] = country
          G.nodes[node_label]["industry"] = industry


    # Edge Counters
    self_loops = 0
    intra_country_edges = 0
    inter_country_edges = 0
    expected_self_loops = len(G.nodes())

    # Add edges
    for index, row in df.iterrows():
      source_country, source_industry = extract_country_industry(row['V1'])
      if not source_industry:
          continue  # Skip invalid entries

      for target_col in df.columns[1:3646]:  # Exclude 'V1' and 'OUT'
        value = row[target_col]
        target_country, target_industry = extract_country_industry(target_col)

        if value > 0 and target_industry:
            source_node = f"{source_country}_{source_industry}"
            target_node = f"{target_country}_{target_industry}"

            if source_node == target_node:
                # Self-loop for intra-industry trade
                G.add_edge(source_node, target_node, weight=value)
                self_loops += 1
            elif source_country == target_country:
                # Intra-country trade (different industries)
                G.add_edge(source_node, target_node, weight=value)
                intra_country_edges += 1
            else:
                # Inter-country trade (cross-layer edges)
                G.add_edge(source_node, target_node, weight=value)
                inter_country_edges += 1

    return G, self_loops, intra_country_edges, inter_country_edges

def analyze_network(G, self_loops, intra_country_edges, inter_country_edges):
    """Prints network summary and degree distribution."""
    print(f"Nodes: {G.number_of_nodes()}, Edges: {G.number_of_edges()}")
    print(f"Self-loops: {self_loops}, Intra-country Edges: {intra_country_edges}, Inter-country Edges: {inter_country_edges}")

    #degree_distribution = [G.degree(n) for n in G.nodes()]
    #plt.hist(degree_distribution, bins=50)
    #plt.xlabel("Degree")
    #plt.ylabel("Frequency")
    #plt.title("Degree Distribution of the Network")
    #plt.show()

def visualize_subgraph(G, fixed_sample_nodes):
    """Visualizes a small subgraph using a fixed set of nodes for consistency across datasets."""
    subG = G.subgraph(fixed_sample_nodes)

    # Get unique country codes from the network
    node_countries = nx.get_node_attributes(G, 'country')
    unique_countries = sorted(set(node_countries.values()))  # Sorted for consistency


    # Assign different colors to different countries
    cmap = plt.cm.get_cmap("tab10", len(unique_countries))  # Generate color map
    country_colors = {country: cmap(i) for i, country in enumerate(unique_countries)}

    # Ensure all nodes have valid country attributes
    node_colors = []
    for node in subG.nodes():
        country = node_countries.get(node, None)
        if country in country_colors:
            node_colors.append(country_colors[country])
        else:
            node_colors.append("gray")  # Default color for missing countries

    plt.figure(figsize=(8, 6))
    nx.draw(subG, with_labels=False, node_size=50, node_color=node_colors, edge_color='gray', alpha=0.7)
    plt.title("Sample Subgraph Visualization with Country Colors")
    plt.show()


In [None]:
# Process multiple datasets
years = [1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020]
file_paths = {year: f"/content/{year}.csv" for year in years}

graphs = {}
sample_nodes = None  # To ensure same visualization sample across datasets

for year, path in file_paths.items():
    print(f"Processing {year} data...")
    df = load_and_clean_data(path)
    G, self_loops, intra_country_edges, inter_country_edges = create_multilayer_network(df)
    graphs[year] = G
    analyze_network(G, self_loops, intra_country_edges, inter_country_edges)

    #if sample_nodes is None:
        #sample_nodes = random.sample(list(G.nodes()), min(50, len(G.nodes())))  # Store the first sample nodes
    #visualize_subgraph(G, sample_nodes)





Processing 1995 data...
Nodes: 3584, Edges: 5546107
Self-loops: 3157, Intra-country Edges: 126275, Inter-country Edges: 5416675
Processing 1996 data...
Nodes: 3616, Edges: 6402324
Self-loops: 3324, Intra-country Edges: 136629, Inter-country Edges: 6262371
Processing 1997 data...
Nodes: 3645, Edges: 6647314
Self-loops: 3346, Intra-country Edges: 137431, Inter-country Edges: 6506537
Processing 1998 data...
Nodes: 3560, Edges: 4392612
Self-loops: 2266, Intra-country Edges: 92434, Inter-country Edges: 4297912
Processing 1999 data...
Nodes: 3561, Edges: 4533371
Self-loops: 2304, Intra-country Edges: 94384, Inter-country Edges: 4436683
Processing 2000 data...


In [1]:
def compute_eigenvector_centrality(G):
    """Computes and returns the eigenvector centrality of the graph."""
    try:
        # Using weight='weight' because your graph is weighted
        centrality = nx.eigenvector_centrality(G, weight='weight')
        return centrality
    except nx.NetworkXException as e:
        print("Error computing eigenvector centrality:", e)
        return {}


# Define your subset
oil_countries = ['SAU','RUS','USA','CN1','CAN','BRA','MEX','NOR','NGA','KAZ','IDN','IND','GBR']
oil_industries = ['B05_06','C19','H49','C20']
target_nodes = [f"{country}_{industry}" for country in oil_countries for industry in oil_industries]

# Initialize dictionaries to hold results
eigenvector_data = {}
degree_data = {}
betweenness_data = {}
pagerank_data = {}

for year, G in graphs.items():
    print(f"Computing centralities for {year}...")

    # Full network centrality calculations
    eigenvector = compute_eigenvector_centrality(G)
    pagerank = nx.pagerank(G, alpha=0.85)
    #degree = nx.degree_centrality(G)
    #betweenness = nx.betweenness_centrality(G, weight='weight')

    # Store results for only oil-related nodes
    for node in target_nodes:
        if node in G:
            eigenvector_data.setdefault(node, {})[year] = eigenvector.get(node, 0)
            pagerank_data.setdefault(node, {})[year] = pagerank.get(node, 0)
            #degree_data.setdefault(node, {})[year] = degree.get(node, 0)
            #betweenness_data.setdefault(node, {})[year] = betweenness.get(node, 0)

# Convert dictionaries to DataFrames
eigenvector_df = pd.DataFrame.from_dict(eigenvector_data, orient='index').sort_index()
pagerank_df = pd.DataFrame.from_dict(pagerank_data, orient='index').sort_index()
#degree_df = pd.DataFrame.from_dict(degree_data, orient='index').sort_index()
#betweenness_df = pd.DataFrame.from_dict(betweenness_data, orient='index').sort_index()

# Save to CSVs
eigenvector_df.to_csv("eigenvector_centrality.csv")
pagerank_df.to_csv("pagerank_centrality.csv")
#degree_df.to_csv("degree_centrality.csv")
#betweenness_df.to_csv("betweenness_centrality.csv")

print("Centrality files created successfully!")

NameError: name 'graphs' is not defined