**Plotting Multiplicity Transit-Probability Expectation Values Over Time**

This code produces a specified number of figures which plot the expectation values of "multiplicity transit-probabilities" over time. The data that is used was generated by sysSym and REBOUND. To run, call the "run" function at the end of the file, and specify the range of files for which you wish to generate plots. This range represents a range of indices which correspond to a list of the data files (see the data_files list).

In [35]:
import csv
import os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.ticker as tck

In [36]:
# Debug flags
DEBUG_SPIKES = False
DEBUG_MAX_PLANETS = False 
SHOW_PLOTS = False

# Also for debugging
spikes = list()

In [37]:
# Gets the path from the current folder ('src') to the data director ('projectOutput').
path = os.path.join(os.pardir, 'projectOutput')

# Stores a list of the data file names from the projectedOutput folder
data_files = sorted(os.listdir(path), key = lambda val: (float(val.replace('sysSim_', '').replace('_randomO.csv', '')) + 0.5) if '_randomO' in val else float(val.replace('sysSim_', '').replace('.csv', '')))

# Processes a .csv file such that the header and data are returned. The data is formatted
# such that each entry of the returned data array is a column represented by a numpy array.
# I.e. data = [[column 1 data...], [column 2 data...], [column 3 data...] etc.]
# The header is a list of the header string names and is in order.
# I.e. header = ['Time', 'Stellar mass', 'Stellar radius' ect.]
def csv2data(filepath):
    file = open(filepath)
    raw_data = list(csv.reader(file))
    file.close()
    
    header = raw_data[0]
    data = np.array(raw_data[1:]).T
    
    return (header, data)

# Helper function to return the header name for n-Planet multiplicity probabilities.
# These headers for the .csv data files are labeled as '1 Planets', '2 Planets', ... '9 Planets'
# Example use: n_planet_prob(1) returns '1 Planets', n_planet_prob(2) returns '2 Planets' etc.
def n_planet_prob(n):
    return str(n) + ' Planets'

# Gleans the relevant data from the chosen data file. This function calculates the expectation value
# and time values and returns them. It also returns the max planets in the system so this information
# can be included in the plots. 
def get_data(filepath):
    # Extracts the header and data from the selected .csv file
    (header, data) = csv2data(filepath)

    # Initializes two axes
    time_axis = list()
    exp_val_axis = list()

    # Finds the column index for the 'Time' column
    time_column = header.index('Time')
    
    max_planets = None
    
    # Calculates the number of planets in the system
    for planet_num in range(1, 10):
        if (n_planet_prob(planet_num) in header) and (max_planets == None or (planet_num > max_planets)):
            max_planets = planet_num
        elif DEBUG_MAX_PLANETS:
            print('"' + n_planet_prob(planet_num) + '" in dataset? ' + ['No.', 'Yes.'][n_planet_prob(planet_num) in header] + " (Planet num, Max planets): " + str((planet_num, max_planets)))
    
    # For debugging
    if DEBUG_MAX_PLANETS:
        print('Max planets: ' + str(max_planets))
    
    spikes = list()
    
    for timestep in range(len(data[0])):
        exp_value = 0
    
        for planet_num in range(1, max_planets+1):            
            multiplicity_column = header.index(n_planet_prob(planet_num))
            
            exp_value += float(planet_num) * float(data[multiplicity_column][timestep])
        
        # For debugging spikes in the graph
        if DEBUG_SPIKES and exp_value > 1:
            spikes.append((data[time_column][timestep], exp_value))
    
        time_axis.append(data[time_column][timestep])
        exp_val_axis.append(exp_value)
    
    if DEBUG_SPIKES:
        if len(spikes) > 0:
            print('Spikes (timestep, expectation value):')

            for spike in spikes:
                print(spike)
        else:
            print('No spikes.')
    
    return {'axes': {'x': time_axis, 'y': exp_val_axis}, 'max_planets': max_planets}

# Formats the data on the x-axis
def formatter(x, pos):
    return '{val}'.format(val = (x/10000.0))

# Plots the data and saves the figure.
def plot_data(data, savename):
    # Initializes the plot
    (fig, ax) = plt.subplots()
    
    (x, y) = (data['axes']['x'], data['axes']['y'])
    max_planets = data['max_planets']
    
    # Calculates min and max values for calculating axis limits
    max_y = np.max(y) + 1
    min_y = np.min(y)
    
    # Plots the data
    plt.plot(x, y, label = savename)

    # Labels the plot
    ax.set_title("Multiplicity Expecation Values vs. Time " + "({planets}-Planet System)".format(planets = max_planets), fontsize = 15)
    ax.set_xlabel('Time (Myr)', fontsize = 12)
    ax.set_ylabel('Expectation Value (Dimensionless)', fontsize = 12)

    # Sets the tick spacing and label format for the x-axis
    ax.xaxis.set_major_locator(tck.MaxNLocator(10)) #(tck.MultipleLocator(1000000)) #(tck.FuncFormatter(formatter))
    ax.xaxis.set_major_formatter(tck.FuncFormatter(formatter)) #tck.FormatStrFormatter('%3.1f'))
    
    ax.legend()
    
    # Sets the axis limits
    ax.set_xlim(xmin = 0)
    ax.set_ylim(0, max_y)

    if SHOW_PLOTS:
        plt.show()

    # Saves the figure using a specified path
    save_path = os.path.join(os.pardir, 'expectation_value_plots', 'ev_' + savename)
    fig.savefig(save_path, bbox_inches="tight", dpi=300)
    
    plt.close()

In [38]:
# Runs the code over the specified range of files. The index range corresponds to indexes of the
# data_files list, which contains each of the data file names.
def run(idx_range):
    for count in idx_range:
        p = os.path.join(path, data_files[count])
        plot_data(get_data(p), data_files[count].replace('.csv', ''))

In [41]:
run(range(30))