In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os
import math
from scipy.stats import f_oneway

def readfile(filename):
    # Get the data from the file
    file = os.path.join(os.getcwd(), 'output', 'files', str(filename))
    df = pd.read_csv(file, index_col=False)

    # Drop unnecessary columns, get exit and entry frame.
    df = df.drop(['Cell kind', 'Coordinates'], axis=1)
    df['exit'] = df.loc[df['Event'] == 'exit', 'Frame'].astype(float)
    df['entry'] = df.loc[df['Event'] == 'entry', 'Frame'].astype(float)
    df = df.drop(['Event', 'Frame'], axis=1)

    # Get run number
    df['Run'] = np.nan
    run = 1
    for i, row in df.iterrows():
        if row[0] == 'ID':
            run += 1
        else:
            df.at[i, 'Run'] = run

    # Group runs and remove info of cells which haven't finished (last 20)
    df = df[df['ID'] != 'ID']
    dfs = [x for _, x in df.groupby('Run')]
    for i in range(len(dfs)):
        dfs[i] = dfs[i].groupby(['ID'], as_index=False).first()
    df = pd.concat(dfs)
    df = df.dropna(subset=['exit'])

    # Remove first 20 cells due to uninhibited movement
    for run in df['Run'].unique():
        ids_to_remove = df[df['Run'] == run]['ID'].unique()[:20]
        df = df[~((df['Run'] == run) & (df['ID'].isin(ids_to_remove)))]

    # Casting dtypes
    df['Frames'] = df['exit'].astype(float) - df['entry'].astype(float)
    df['Run'] = df['Run'].astype(int)
    runs = df['Run'].unique()

    # Creating final dataframe
    summary_df = pd.DataFrame(columns=['Run', 'Mean Frames', 'Std Frames', 'Min Frames', 'Max Frames',
                                       'Median Frames', 'Q1 Frames', 'Q3 Frames'])

    for run in df['Run'].unique():
        df_run = df[df['Run'] == run]
        mean_frames = df_run['Frames'].mean()
        std_frames = df_run['Frames'].std()
        min_frames = df_run['Frames'].min()
        max_frames = df_run['Frames'].max()
        median_frames = df_run['Frames'].median()
        q1_frames = df_run['Frames'].quantile(0.25)
        q3_frames = df_run['Frames'].quantile(0.75)

        row = pd.DataFrame({'Run': [run], 'Mean Frames': [mean_frames], 'Std Frames': [std_frames],
                            'Min Frames': [min_frames], 'Max Frames': [max_frames],
                            'Median Frames': [median_frames], 'Q1 Frames': [q1_frames], 'Q3 Frames': [q3_frames]})

        summary_df = pd.concat([summary_df, row], ignore_index=True)

    # Creating final averaging row
    average_all_runs = summary_df.mean().to_frame().T
    average_all_runs['Run'] = 'Average'
    summary_df = pd.concat([summary_df, average_all_runs], ignore_index=True)

    return summary_df, df


In [2]:
filenames = ['lambda_act_100_setting_1.csv',
            'lambda_act_100_setting_2.csv',
            'lambda_act_100_setting_3.csv',
            'lambda_act_100_setting_4.csv',
            'lambda_act_200_setting_1.csv',
            'lambda_act_200_setting_2.csv',
            'lambda_act_200_setting_3.csv',
            'lambda_act_200_setting_4.csv',
            'lambda_act_300_setting_1.csv',
            'lambda_act_300_setting_2.csv',
            'lambda_act_300_setting_3.csv',
            'lambda_act_300_setting_4.csv']

all_data = {}
for file in filenames:
    all_data[file] = readfile(file)

  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)


In [3]:
import matplotlib.pyplot as plt

# Maak een plot voor het gemiddeld aantal frames per run
plt.figure(figsize=(10,6))
plt.plot(summary_df['Run'][:-1], summary_df['Mean Frames'][:-1])  # we sluiten het laatste gemiddelde uit
plt.xlabel('Run')
plt.ylabel('Gemiddeld aantal Frames')
plt.title('Gemiddeld aantal frames per run')
plt.grid(True)
plt.show()

# Maak een histogram van de gemiddelde frames
plt.figure(figsize=(10,6))
plt.hist(full_df['Frames'], bins=20, edgecolor='black')
plt.xlabel('Aantal Frames')
plt.ylabel('Frequentie')
plt.title('Histogram van het aantal Frames')
plt.grid(True)
plt.show()

# Boxplot van Frames per Run
full_df.boxplot(column='Frames', by='Run', figsize=(10,6))
plt.title('Boxplot van Frames per Run')
plt.suptitle('')  # Verwijder de automatische 'Boxplot grouped by Run' titel
plt.show()

# Scatterplot van 'exit' en 'entry'
plt.figure(figsize=(10,6))
plt.scatter(full_df['exit'], full_df['entry'])
plt.xlabel('Exit Frame')
plt.ylabel('Entry Frame')
plt.title('Scatterplot van Exit vs Entry Frames')
plt.grid(True)
plt.show()


NameError: name 'summary_df' is not defined

<Figure size 1000x600 with 0 Axes>

In [4]:
def z_test(run1, run2):
    data1 = all_data[run1][0]
    data2 = all_data[run2][0]
    avg1 = data1.loc[100]['Mean Frames']
    avg2 = data2.loc[100]['Mean Frames']
    std1 = data1.loc[100]['Std Frames']
    std2 = data2.loc[100]['Std Frames']
    return (avg1 - avg2) / (math.sqrt(std1 ** 2 + std2 ** 2))

keys = list(all_data.keys())
z_test(keys[0], keys[1])

-0.2577266237933793

# $\lambda$Act Analysis


In [5]:
for x in keys:
    print(f'Name is {x} with mean frames of: {all_data[x][0].loc[100]["Mean Frames"]}')

Name is lambda_act_100_setting_1.csv with mean frames of: 1569.9441982310943
Name is lambda_act_100_setting_2.csv with mean frames of: 1635.4987571476438
Name is lambda_act_100_setting_3.csv with mean frames of: 1635.2814830444029
Name is lambda_act_100_setting_4.csv with mean frames of: 1583.9637180551165
Name is lambda_act_200_setting_1.csv with mean frames of: 1241.0281967213116
Name is lambda_act_200_setting_2.csv with mean frames of: 1278.2236557377046
Name is lambda_act_200_setting_3.csv with mean frames of: 1285.154721311475
Name is lambda_act_200_setting_4.csv with mean frames of: 1254.4688524590165
Name is lambda_act_300_setting_1.csv with mean frames of: 1046.1611237440507
Name is lambda_act_300_setting_2.csv with mean frames of: 1054.4572131147543
Name is lambda_act_300_setting_3.csv with mean frames of: 1051.1996721311475
Name is lambda_act_300_setting_4.csv with mean frames of: 1055.4608196721313


In [6]:
from scipy.stats import f_oneway
averages = [all_data[x][0].loc[100]['Mean Frames'] for x in keys]
setting_1 = [averages[x] for x in range(0, len(averages), 4)]
setting_2 = [averages[x] for x in range(1, len(averages)+1, 4)]
setting_3 = [averages[x] for x in range(2, len(averages)+2, 4)]
setting_4 = [averages[x] for x in range(3, len(averages)+3, 4)]
f_score, p = f_oneway(setting_1, setting_2, setting_3, setting_4)
print(f"ANOVA F-score between settings: \nf_score = {f_score}, p={p}")

ANOVA F-score between settings: 
f_score = 0.013592550779932601, p=0.9976515171042021


In [7]:
value_1 = averages[0:4]
value_2 = averages[4:8]
value_3 = averages[8:12]
anova_score, p = f_oneway(value_1, value_2, value_3)
print(anova_score, '{0:.10f}'.format(p))

582.6425130305911 0.0000000003


# Cell_to_cell adhesion Analysis

In [8]:
c2c_names = ['c2c_adh_0_setting_1.csv',
            'c2c_adh_0_setting_2.csv',
            'c2c_adh_0_setting_3.csv',
            'c2c_adh_0_setting_4.csv',
            'c2c_adh_5_setting_1.csv',
            'c2c_adh_5_setting_2.csv',
            'c2c_adh_5_setting_3.csv',
            'c2c_adh_5_setting_4.csv',
            'c2c_adh_10_setting_1.csv',
            'c2c_adh_10_setting_2.csv',
            'c2c_adh_10_setting_3.csv',
            'c2c_adh_10_setting_4.csv',]

c2c_data = {}
for filename in c2c_names:
    c2c_data[filename] = readfile(filename)

  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)


In [9]:
c2c_keys = list(c2c_data.keys())
for x in c2c_keys:
    print(f'Name is {x} with mean frames of: {c2c_data[x][0].loc[100]["Mean Frames"]}')

Name is c2c_adh_0_setting_1.csv with mean frames of: 1239.5965573770495
Name is c2c_adh_0_setting_2.csv with mean frames of: 1280.8542622950824
Name is c2c_adh_0_setting_3.csv with mean frames of: 1284.601803278688
Name is c2c_adh_0_setting_4.csv with mean frames of: 1249.9285245901638
Name is c2c_adh_5_setting_1.csv with mean frames of: 1256.882131147541
Name is c2c_adh_5_setting_2.csv with mean frames of: 1294.8214754098356
Name is c2c_adh_5_setting_3.csv with mean frames of: 1305.215016393443
Name is c2c_adh_5_setting_4.csv with mean frames of: 1269.2911475409826
Name is c2c_adh_10_setting_1.csv with mean frames of: 1272.6839344262291
Name is c2c_adh_10_setting_2.csv with mean frames of: 1316.6831147540984
Name is c2c_adh_10_setting_3.csv with mean frames of: 1320.296496210118
Name is c2c_adh_10_setting_4.csv with mean frames of: 1286.5237377930546


In [10]:
c2c_avgs = [c2c_data[x][0].loc[100]['Mean Frames'] for x in c2c_keys]
setting_1 = [c2c_avgs[x] for x in range(0, len(c2c_avgs), 4)]
setting_2 = [c2c_avgs[x] for x in range(1, len(c2c_avgs)+1, 4)]
setting_3 = [c2c_avgs[x] for x in range(2, len(c2c_avgs)+2, 4)]
setting_4 = [c2c_avgs[x] for x in range(3, len(c2c_avgs)+3, 4)]
f_score, p = f_oneway(setting_1, setting_2, setting_3, setting_4)
print(f"ANOVA F-score between settings: \nf_score = {f_score}, p={p}")

ANOVA F-score between settings: 
f_score = 4.87277861170081, p=0.03258749265358982


In [11]:
value_1 = c2c_avgs[0:4]
value_2 = c2c_avgs[4:8]
value_3 = c2c_avgs[8:12]
f_score, p = f_oneway(value_1, value_2, value_3)
print(f"ANOVA F-score between act-values: \nf_score = {f_score}, p={p}")

ANOVA F-score between act-values: 
f_score = 2.432386945816856, p=0.14304863926137187


# Pushing Power analysis

In [12]:
power_files = [
    'scenario_1-dir_50.csv',
    'scenario_2-dir_50.csv',
    'scenario_3-dir_50.csv',
    'scenario_4-dir_50.csv',
    'scenario_1-dir_100.csv',
    'scenario_2-dir_100.csv',
    'scenario_3-dir_100.csv',
    'scenario_4-dir_100.csv',
    'scenario_1-dir_200.csv',
    'scenario_2-dir_200.csv',
    'scenario_3-dir_200.csv',
    'scenario_4-dir_200.csv'
]
power_data = {}
for x in power_files:
    power_data[x] = readfile(x)

  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)


In [13]:
power_keys = list(power_data.keys())
for x in power_keys:
    print(f'Name is {x} with mean frames of: {power_data[x][0].loc[100]["Mean Frames"]}')

Name is scenario_1-dir_50.csv with mean frames of: 2323.5615835880335
Name is scenario_2-dir_50.csv with mean frames of: 2415.3287906442715
Name is scenario_3-dir_50.csv with mean frames of: 2403.2929405661016
Name is scenario_4-dir_50.csv with mean frames of: 2345.9999843938135
Name is scenario_1-dir_100.csv with mean frames of: 1239.5965573770495
Name is scenario_2-dir_100.csv with mean frames of: 1278.2236557377046
Name is scenario_3-dir_100.csv with mean frames of: 1286.9199999999992
Name is scenario_4-dir_100.csv with mean frames of: 1255.4244262295083
Name is scenario_1-dir_200.csv with mean frames of: 975.7345901639345
Name is scenario_2-dir_200.csv with mean frames of: 1006.6977049180325
Name is scenario_3-dir_200.csv with mean frames of: 1002.7018032786883
Name is scenario_4-dir_200.csv with mean frames of: 978.0706557377047


In [14]:
power_avgs = [power_data[x][0].loc[100]['Mean Frames'] for x in power_keys]
setting_1 = [power_avgs[x] for x in range(0, len(power_avgs), 4)]
setting_2 = [power_avgs[x] for x in range(1, len(power_avgs)+1, 4)]
setting_3 = [power_avgs[x] for x in range(2, len(power_avgs)+2, 4)]
setting_4 = [power_avgs[x] for x in range(3, len(power_avgs)+3, 4)]
f_score, p = f_oneway(setting_1, setting_2, setting_3, setting_4)
print(f"ANOVA F-score between settings: \nf_score = {f_score}, p={p}")

ANOVA F-score between settings: 
f_score = 0.004097439502146847, p=0.9996067350993111


In [15]:
value_1 = power_avgs[0:4]
value_2 = power_avgs[4:8]
value_3 = power_avgs[8:12]
f_score, p = f_oneway(value_1, value_2, value_3)
print(f"ANOVA F-score between act-values: \nf_score = {f_score}, p={p}")

ANOVA F-score between act-values: 
f_score = 2390.0451304826074, p=5.406954212489819e-13


# Obstacle size Analysis

In [22]:
names = [
    'base_1-obst_60.csv',
    'base_1-obst_55.csv',
    'base_1-obst_50.csv',
    'base_1-obst_45.csv',
    'base_1-obst_40.csv',
    'base_1-obst_35.csv',
    'base_1-obst_30.csv',
    'base_1-obst_25.csv',
    'base_1-obst_20.csv',
    'base_1-obst_0.csv',
]

standard_data = {}

for x in names:
    standard_data[x] = readfile(x)

  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)
  df = pd.read_csv(file, index_col=False)


In [25]:
standard_keys = list(standard_data.keys())
mean = []
std = []
min_ = []
max_ = []
for x in standard_keys:
    mean.append(standard_data[x][0].loc[100]["Mean Frames"])
    std.append(standard_data[x][0].loc[100]["Mean Frames"])
    min_.append(standard_data[x][0].loc[100]["Min Frames"])
    max_.append(standard_data[x][0].loc[100]["Max Frames"])

    print(f'Name is {x} with mean frames of: {standard_data[x][0].loc[100]["Mean Frames"]}')
    print(f'Name is {x} with std frames of: {standard_data[x][0].loc[100]["Std Frames"]}')
    print(f'Name is {x} with min frames of: {standard_data[x][0].loc[100]["Min Frames"]}')
    print(f'Name is {x} with Q1 frames of: {standard_data[x][0].loc[100]["Q1 Frames"]}')
    print(f'Name is {x} with median frames of: {standard_data[x][0].loc[100]["Median Frames"]}')
    print(f'Name is {x} with Q2 frames of: {standard_data[x][0].loc[100]["Q3 Frames"]}')
    print(f'Name is {x} with max frames of: {standard_data[x][0].loc[100]["Max Frames"]}')

Name is base_1-obst_60.csv with mean frames of: 926.9412427287149
Name is base_1-obst_60.csv with std frames of: 91.22239994362423
Name is base_1-obst_60.csv with min frames of: 748.69
Name is base_1-obst_60.csv with Q1 frames of: 868.1825
Name is base_1-obst_60.csv with median frames of: 926.81
Name is base_1-obst_60.csv with Q2 frames of: 982.7
Name is base_1-obst_60.csv with max frames of: 1162.04
Name is base_1-obst_55.csv with mean frames of: 924.4044473823371
Name is base_1-obst_55.csv with std frames of: 101.94056458818002
Name is base_1-obst_55.csv with min frames of: 736.21
Name is base_1-obst_55.csv with Q1 frames of: 859.1925
Name is base_1-obst_55.csv with median frames of: 921.065
Name is base_1-obst_55.csv with Q2 frames of: 986.46
Name is base_1-obst_55.csv with max frames of: 1213.05
Name is base_1-obst_50.csv with mean frames of: 916.1214754098353
Name is base_1-obst_50.csv with std frames of: 104.47734100192098
Name is base_1-obst_50.csv with min frames of: 726.88
Nam