In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import pandas as pd
import numpy as np
from scipy.signal import find_peaks

import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import seaborn as sns

# set text size for plots to be larger
plt.rcParams.update({"font.size": 16})

import glob
import os
import tqdm
import plot_fuctions as pf
from plot_fuctions import load_and_process_file, get_revo_count, get_combined_count, set_paths


In [None]:
# change current working directory to the folder where the data is stored
cwd = os.chdir("/home/joe/ttop/cascade/")
data_path = os.getcwd() + "/data/midway_294444"
model_path = os.getcwd() + "/data/midway_full_run/model/"
image_path = os.getcwd() + "/model_analysis/analysis_images/midway_294444/"
data_directory = pf.set_paths(data_path, image_path)

all_files = [os.path.join(data_directory, f) for f in os.listdir(data_directory) if f.endswith('.parquet')]
all_dataframes = [load_and_process_file(file_path) for file_path in all_files]
df = pd.concat(all_dataframes, ignore_index=True)
print(df.head())

In [None]:
unique_model_count = df["Model"].nunique()
print(f"There are {unique_model_count} unique values in the 'model' column.")
get_revo_count(df)
df = get_combined_count(df)

In [None]:
# Create a dictionary to map Model values to unique numbers
model_number_mapping = {model: i for i, model in enumerate(df['Model'].unique())}

# Create a new column 'Model Number' by applying the mapping to the 'Model' column
df['Model Number'] = df['Model'].map(model_number_mapping)

In [None]:
# Filter the DataFrame to only include rows with the selected model pairs
selected_models = [model for pair in model_pairs for model in pair]
filtered_df = df[df['Model'].isin(selected_models)]

# Melt the DataFrame to have separate columns for Active and Jail Count
melted_df = pd.melt(
    filtered_df,
    id_vars=['Model', 'Model Number', 'Epsilon', 'Step'],
    value_vars=['Active Count', 'Jail Count'],
    var_name='Count_Type',
    value_name='Count'
)

# Combine Model and Count_Type columns into a new column
melted_df['Model_Count_Type'] = melted_df['Model'] + ' - ' + melted_df['Count_Type']

# Create a new column for pair number
model_to_pair = {model: i // 2 + 1 for i, model in enumerate(selected_models)}
melted_df['Pair'] = melted_df['Model'].map(model_to_pair)

# Create a color map based on the Epsilon values
color_map = {0.1: 'red', 0.2: 'red', 1.0: 'blue'}

# Create a bar plot using Seaborn's barplot
sns.set(style="whitegrid")
sns.set_context("notebook", font_scale=1.5)

plt.figure(figsize=(8.5, 11))

# Create the FacetGrid with the 'Pair' column
g = sns.FacetGrid(
    data=melted_df,
    col='Pair',
    col_wrap=2,
    height=4,
    aspect=1.5,
    sharex=True,
    sharey=True,
    margin_titles=True
)

# Plot the Active and Jail Count
g.map_dataframe(sns.lineplot, x='Step', y='Count', hue='Epsilon', style='Count_Type', palette=color_map, legend=False)

# Customize the titles for each graph in the facet
g.set_titles(template="{col_name}")

# Change the title to display the model number pair
for ax, pair in zip(g.axes.flat, model_pairs):
    ax.set_title(f"Model Pair: {df[df['Model'] == pair[0]]['Model Number'].unique()}, {df[df['Model'] == pair[1]]['Model Number'].unique()}")

# Save the figure
plt.savefig(image_path + "facet_graph.png", dpi=300, bbox_inches='tight')

# Show the plot
plt.show()


In [None]:
# Filter the data based on the given conditions
filtered_df = df.loc[
    (df['Seed'] == df['Seed'].shift()) &
    (df['Security Density'] == df['Security Density'].shift()) &
    (df['Private Preference'] == df['Private Preference'].shift()) &
    (
        (
            (df['Epsilon'] == 1.0) &
            (df['Revolution'] == False)
        ) |
        (
            (df['Epsilon'].isin([0.1, 0.2])) &
            (df['Revolution'] == True)
        )
    )
]

# Group the filtered DataFrame by Seed, Security Density, and Private Preference
grouped_df = filtered_df.groupby(['Seed', 'Security Density', 'Private Preference'])

# Initialize an empty list to store model pairs
model_pairs = []

# Iterate through the grouped DataFrame and store the Model pairs
for _, group in grouped_df:
    models = group['Model'].unique()
    if len(models) == 2:
        model_pairs.append(tuple(models))

# Print the list of model pairs
print("Model Pairs:")
for pair in model_pairs:
    print(pair)

In [None]:
def check_models(df, model1, model2):
    print(f"Model 1: {df[df['Model Number'] == model1]['Model'].unique()}, {model1}")
    print(f"Model 1: Revolutions: {df[df['Model Number'] == model1]['Revolution'].unique()}")
    print(f"Model 1: {df[df['Model Number'] == model2]['Model'].unique()}, {model2}")
    print(f"Model 2: Revolutions: {df[df['Model Number'] == model2]['Revolution'].unique()}")

    # Model 1
    plt.figure()
    sns.lineplot(x='Step', y='Active Count', data=df[df['Model Number'] == model1], label='Active Count', color='red')
    sns.lineplot(x='Step', y='Jail Count', data=df[df['Model Number'] == model1], label='Jail Count', linestyle='--', color='red')
    sns.lineplot(x='Step', y='Active Count', data=df[df['Model Number'] == model2], label='Active Count',color='blue')
    sns.lineplot(x='Step', y='Jail Count', data=df[df['Model Number'] == model2], label='Jail Count', linestyle='--', color='blue')
    plt.ylim(0, 1120)
    plt.xlim(0, 500)
    plt.legend()
    plt.show()
