In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import glob
import scipy.stats
from IPython.display import Video, display, HTML
import tqdm
import statsmodels.formula.api as sm
import copy

sns.set()

def read_csvx(path, *args, **kwargs):
    try:
        return pd.read_csv(str(path), *args, **kwargs)
    except FileNotFoundException as e:
        return pd.read_csv(str(path) +'.gz', *args, **kwargs)


def followorbit(vals, path):
    out = []
    for n in range(100):
        closest05 = np.clip(round(path[n] * 2) / 2, 0, 10)
        rank = vals.query(f'nbopp == {n} and othercoop == {closest05}')['rank'].values[0]
        out.append(rank)
    return np.array(out)

import re
def gethighest(vals, prefix="logall_", which=1):
    if which == 1:
        m = np.argmax([int(re.search(f'{prefix}(\d+)', val).group(1)) for val in vals])
    else:
        trueval = [int(re.search(f'{prefix}(\d+)', val).group(1)) for val in vals]
        itrueval = sorted(list(enumerate(trueval)), key=lambda x: x[1])
        m = itrueval[-which][0]
    return vals[m]

    
def readparams(file):
    params = {} 
    with open(file) as f:
        for line in f:
            line = line[:-1]  # remove \n
            if not line or line.startswith('#') or line.startswith('import'):
                continue
            key, val = line.split('=')
            if val.lower() == 'true' or val.lower() == 'false':
                params[key] = val.lower() == 'true'
            else:
                try:
                    params[key] = float(val)
                except ValueError:
                    params[key] = val
    return params

def sorted_nicely(l): 
    """ Sort the given iterable in the way that humans expect.""" 
    convert = lambda text: int(text) if text.isdigit() else text 
    alphanum_key = lambda key: [ convert(c) for c in re.split('([0-9]+)', key) ] 
    return sorted(l, key = alphanum_key)

def readparams(file):
    params = {} 
    with open(file) as f:
        for line in f:
            line = line[:-1]  # remove \n
            if not line or line.startswith('#') or line.startswith('import'):
                continue
            key, val = line.split('=')
            if val.lower() == 'true' or val.lower() == 'false':
                params[key] = val.lower() == 'true'
            else:
                try:
                    params[key] = float(val)
                except ValueError:
                    params[key] = val
    return params

import platform
hostname = platform.node()
print(hostname)

if hostname == 'cluster':
    prefix = '/home/ecoffet/robocoop/'
else:
    prefix = '/home/pecoffet/remoterobo/'

In [None]:
allpaths = glob.glob("/home/ecoffet/robocoop/logs/lionscross10-procbtest/**/*opti2/**/a100/**/replay/", recursive=True)

In [None]:
datas = []
for path in tqdm.tqdm_notebook(allpaths):
    param = readparams(glob.glob(path + '/properties*')[0])
    try:
        df = pd.read_csv(path + "/logscore_0.txt.gz",nrows=1_000_000)
    except:
        print('error for', path)
        continue
    gp = df.groupby(['eval', 'iter', 'id'])
    meancoop = gp.mean()['othercoop'].copy()
    chosen = gp.apply(lambda x: x.loc[x['score'].idxmax()]['othercoop']).copy()
    info = copy.deepcopy(param)
    info['percentmaxcoop'] = np.sum(chosen > meancoop) / len(chosen)
    datas.append(info)
fulldf = pd.DataFrame(datas)

In [None]:
fulldf.to_pickle('score_loaded.pkl')

In [None]:
fulldf = pd.read_pickle('score_loaded.pkl')

In [None]:
sns.scatterplot(x='gNbOfPhysicalObjects', y='percentmaxcoop', data=fulldf)

In [None]:
sns.regplot(x='gNbOfPhysicalObjects', y='percentmaxcoop', data=fulldf)

In [None]:
sns.lineplot(x='gNbOfPhysicalObjects', y='percentmaxcoop', data=fulldf)

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
plt.figure(figsize=(10, 3))
sns.scatterplot(data=df.query('eval == 0 and id == 11'), x='othercoop', y='score', hue="nbpart")

In [None]:
from scipy import stats

In [None]:
normdf = (df - df.mean()) / df.std()
sm.ols('score ~ othercoop + nbpart', data=normdf).fit().summary()