In [7]:
import pandas as pd
from ecoILP import load_model, handleEdgeList, extractFeatures, predictLinks, plotMetrics, plotProbsMatrix

# Force reload modules each execution
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
model = load_model()

In [4]:
networks_list = ['1982.csv', '1983.csv', '1984.csv', '1985.csv', '1986.csv', '1987.csv']

# Load all networks
networks = [pd.read_csv(f'data/raw/networks/case_study/{network}', index_col=0, header=0) for network in networks_list]

# Convert to edge list
edgeLists = [network.stack().rename_axis(['lower_level','higher_level']).rename('weight').reset_index() for network in networks]

# Add year column
for i, year in enumerate(networks_list):
    edgeLists[i]['year'] = year.split('.')[0]

In [5]:
output = []

for edgeList in edgeLists:

    dataframe = handleEdgeList(
        edgeList, 
        linkID_col = None, # If the edge list doesn't have a column for link ID, it will be created
        topNodes_col = 'higher_level', 
        bottomNodes_col = 'lower_level', 
        networkID_col = 'year', # If the edge list doesn't have a column for network ID, a dummy value will be created
        groupID_col = None,
        weight_col = 'weight', # currently support only binary values though
        community = 'Host-Parasite', # in case the community is known but not in the groupID_col
        sample_fraction = 0.2, # fraction of missing links to create
        # missing_links = sample_network[sample_network['class'] == -1]['link_ID'], # if the missing links are predefined
        # groundTruth_col = 'class'
        )

    dataframe_with_features = extractFeatures(dataframe)
    
    probabilities, classifications = predictLinks(dataframe_with_features, model)

    plotMetrics(
        dataframe_with_features,
        probabilities, 
        plots=['confusion_matrix', 'single_evaluation', 'roc_curve', 'pr_curve', 'probs_distribution']
        )

    # plotProbsMatrix(dataframe, probabilities, figsize=(14,8))
    
    result = pd.concat([
        dataframe.rename(columns={'name': 'year'}).drop(columns=['weight'], axis=1), 
        pd.DataFrame(probabilities, columns=['y_proba']),
        pd.DataFrame(classifications, columns=['y_pred'])
        ], axis=1)
    
    output.append(result)

In [6]:
output = pd.concat(output, ignore_index=True)

output.to_csv('results/intermediate/case_study.csv', index=False)