# Data structrue

Load one of the datasets:

In [1]:
import glob
import json
import os

import dallinger
import matplotlib.pyplot as plt
import numpy as np

import preprocess

data = dallinger.data.load("0346ecab-57e4-d084-10c9-2a767363807c")

Available tables include:

```
data.infos            # Information generated by nodes
data.networks         # Nodes and vectors between them
data.nodes            # Nodes in the network, one per agent
data.notifications    # Notifications from recruiters about participants
data.participants     # Participants recruited for the experiment
data.questions        # Questionnaire items asked of participants
data.transformations  # Transformations of one info into another
data.transmissions    # Transmissions of infos from one node to another
data.vectors          # Links between nodes
```

And each table can be accessed, e.g., in these formats:

```
data.networks.csv    # Comma-separated value
data.networks.dict   # Python dictionary
data.networks.df     # pandas DataFrame
data.networks.html   # HTML table
data.networks.latex  # LaTeX table
data.networks.list   # Python list
data.networks.ods    # OpenDocument Spreadsheet
data.networks.tsv    # Tab-separated values
data.networks.xls    # Legacy Excel spreadsheet
data.networks.xlsx   # Modern Excel spreadsheet
data.networks.yaml   # YAML
```

# Analyses
Below are some analyses of the above data.

In [None]:
# Plot players positions over time.
(times, positions_x, positions_y) = preprocess.timecourse_player_positions(data)
plt.xlim([0, 48])
plt.ylim([0, 48])
for i in range(positions_x.shape[1]):
    plt.plot(positions_x[:, i], positions_y[:, i], alpha=0.50)
plt.show()

In [None]:
(times, positions_x, positions_y) = preprocess.timecourse_player_positions(data)
plt.xlim([0, 48])
plt.ylim([0, 48])
plt.axis("off")

for i in range(positions_x.shape[1]):
    plt.subplot(3, 3, i+1)
    plt.plot(positions_x[:, i], positions_y[:, i], alpha=0.50)
plt.show()

In [None]:
# Plot score per player over time.
(t, s) = preprocess.timecourse_player_scores(data)
plt.ylim([0, np.max(s)])
plt.xlabel("Time")
plt.ylabel("Score")
for column in s.T:
    plt.plot(t, column)
plt.show()

In [None]:
(t, s) = preprocess.timecourse_player_scores(data)
plt.ylim([0, 1000])
plt.xlabel("Time")
plt.ylabel("Total score")
plt.plot(t, np.sum(s, axis=1))
plt.show()

In [None]:
(t, n) = preprocess.timecourse_num_players(data)
plt.ylim([0, 20])
plt.xlabel("Time")
plt.ylabel("Number of players in game")
plt.plot(t, n)
plt.show()

# Run on all datasets.

In [None]:
# Load in the data sets.

os.chdir("data")
data_ids = []
for file in glob.glob("*.zip"):
    data_ids.append(file[:-9])
os.chdir("..")

for i in data_ids:
    # Load in dataset.
    data = dallinger.data.load(i)
    print(data)
    
    (times, positions_x, positions_y) = preprocess.timecourse_player_positions(data)
    plt.xlim([0, 48])
    plt.ylim([0, 48])
    for i in range(positions_x.shape[1]):
        plt.plot(positions_x[:, i], positions_y[:, i], alpha=0.50)
    plt.show()
    
    (times, positions_x, positions_y) = preprocess.timecourse_player_positions(data)
    plt.xlim([0, 48])
    plt.ylim([0, 48])
    plt.axis("off")

    for i in range(positions_x.shape[1]):
        plt.subplot(3, 3, i+1)
        plt.plot(positions_x[:, i], positions_y[:, i], alpha=0.50)
    plt.show()
    
    (t, s) = preprocess.timecourse_player_scores(data)
    plt.ylim([0, np.max(s)])
    plt.xlabel("Time")
    plt.ylabel("Score")
    for column in s.T:
        plt.plot(t, column)
    plt.show()
    
    (t, s) = preprocess.timecourse_player_scores(data)
    plt.ylim([0, 1000])
    plt.xlabel("Time")
    plt.ylabel("Total score")
    plt.plot(t, np.sum(s, axis=1))
    plt.show()
    
    (t, n) = preprocess.timecourse_num_players(data)
    plt.ylim([0, 20])
    plt.xlabel("Time")
    plt.ylabel("Number of players in game")
    plt.plot(t, n)
    plt.show()