In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import glob
import scipy.stats
from IPython.display import Video, display, HTML
import re

sns.set()


def gethighest(vals, prefix="logall_"):
        m = np.argmax([int(re.search(f'{prefix}(\d+)', val).group(1)) for val in vals])
        return vals[m]

def read_csvx(path, *args, **kwargs):
    try:
        return pd.read_csv(str(path) +'.gz', *args, **kwargs)
    except Exception as e:
        return pd.read_csv(str(path), *args, **kwargs)

        
def readparams(file):
    params = {} 
    with open(file) as f:
        for line in f:
            line = line[:-1]  # remove \n
            if not line or line.startswith('#') or line.startswith('import'):
                continue
            key, val = line.split('=')
            if val.lower() == 'true' or val.lower() == 'false':
                params[key] = val.lower() == 'true'
            else:
                try:
                    params[key] = float(val)
                except ValueError:
                    params[key] = val
    return params

In [None]:
import tqdm
params = []
prefix = '/home/pecoffet/remoterobo'
prefix = '/home/ecoffet/robocoop'
gen = 499

paths = sorted([]
                  + glob.glob(f"{prefix}/logs/lionscross2/*/*/*/*/*/rep00/", recursive=True)
                  )
#paths = sorted(glob.glob(f"{prefix}/logs/lions-nvar-*-2019-06-*/**/rep00/", recursive=True))
for path in tqdm.tqdm_notebook(paths):
    if 'i50000' in path or 'i10000' in path:
        continue
    #print(path)
    try:
        #print(path)
        logall = read_csvx(path+f"/logall_{gen}.txt", delimiter="\t")
    except Exception as e:
        print(e)
        continue
    # Get Fitness
    fitness = read_csvx(path + '/fitnesslog.txt', delimiter="\t", )
    medfit = fitness.query(f"gen == {gen}")['fitness'].median()
    meanfit = fitness.query(f"gen == {gen}")['fitness'].mean()
    curparam = readparams(glob.glob(path + 'properties*')[0])
    
    ### TODO
    ### Faire la moyenne par agent et pas par jeu !
    ### TODO
    logall.query(f'nbOnOpp == {curparam["nOpti"]}').groupby('ind')["curCoopNoCoef"].mean()
    nmod = logall['nbOnOpp'].mode()[0]
    coopopti = np.nanmean(logall.query(f"nbOnOpp == {curparam['nOpti']}")["curCoopNoCoef"])
    if np.isnan(coopopti):  # WARNING, bold choice !
        coopopti = 0
    coopmod =  np.nanmean(logall.query(f'nbOnOpp == {nmod}')["curCoopNoCoef"].mean())
    params.append({'path':path, 'params':curparam, 'coopopti': coopopti, 'coopmod': coopmod, 'nmod': nmod, 'medfit': medfit, 'meanfit': meanfit})



In [None]:
diffkeys = set()
missingkeys = set()
for i in tqdm.tqdm_notebook(range(len(params))):
    for key in params[i]['params'].keys():
        try:
            if any(params[j]['params'][key] != params[i]['params'][key] for j in range(0, len(params) - 1)):
                diffkeys.add(key)
        except KeyError:
            missingkeys.add(key)

In [None]:
excluded = ['gSeed', 'gLogFilename', 'path']
processed = []
for elem in params:
    param = elem['params']
    ess = param['meanA'] / param['nOpti']
    so = param['meanA'] + param['b'] * (param['nOpti'] - 1) / param['nOpti']
    outdict = {'coopopti': (elem['coopopti'] - ess) / (so - ess) , 'truecoopopti': elem['coopopti'],
               'coopmod': (elem['coopmod'] - ess) / (so - ess) , 'truecoopmod': elem['coopmod'], 'nmod': elem['nmod'], 'medfit': elem['medfit'],
               'meanfit': elem['meanfit']}
    for key in diffkeys:
        if key in excluded: continue
        outdict[key] = elem['params'][key]
    processed.append(outdict)
    for key in missingkeys:
        if key in excluded: continue
        outdict[key] = elem['params'].get(key, False)
df = pd.DataFrame(processed)
normalized_df=(df-df.mean())/df.std()
print(missingkeys)

In [None]:
df

In [None]:
df.to_pickle('all_file_loaded.pkl.gz')

In [None]:
nopti = 20
ntol = 1
curcond = df.query(f'nOpti == {nopti} and nTolerance == {ntol} and evaluationTime == 1000')
fig, gax = plt.subplots(3, 1, figsize=(6, 9))
axs = gax
st = fig.suptitle(f'nOpti == {nopti} and nTolerance == {ntol} and evaluationTime == 1000')
sns.regplot(x='gNbOfPhysicalObjects', y='truecoopopti', data=curcond, ax=axs[0])
axs[0].set_ylabel('Coop moyenne des agents')
axs[0].set_xlabel('Nombre d\'objet')
axs[0].axhline(5/nopti, label='defect', c='r')
axs[0].axhline(5+10*(nopti-1)/nopti, label='SO', c='b')
axs[0].legend()

sns.regplot(x='gNbOfPhysicalObjects', y='nmod', data=curcond, ax=axs[1])
axs[1].set_ylabel('Mode du nombre d\'agent par opp')
axs[1].set_xlabel('Nombre d\'objet')
axs[1].axhline(nopti, label='nopti', c='b')
axs[1].legend()

sns.regplot(x='gNbOfPhysicalObjects', y='medfit', data=curcond, ax=axs[2])
axs[2].set_ylabel('Fitness moyenne des agents')
axs[2].set_xlabel('Nombre d\'objet')
fig.tight_layout()
st.set_y(0.95)
fig.subplots_adjust(top=0.90)
plt.show(fig);


In [None]:
import statsmodels.formula.api as sm
sns.regplot(x='nOpti', y='truecoopopti', data=df.query(f' nTolerance == 0.2 and gNbOfPhysicalObjects > 60'))
sm.ols(formula='truecoopopti ~ nOpti', data=df.query(f'nTolerance == 0.2 and gNbOfPhysicalObjects > 60')).fit().summary()

In [None]:
sns.regplot(x='nOpti', y='medfit', data=df.query(f'nTolerance == 0.2 and gNbOfPhysicalObjects > 60'))
sm.ols(formula='medfit ~ nOpti', data=df.query(f'nTolerance == 0.2 and gNbOfPhysicalObjects > 60')).fit().summary()

In [None]:
#normalized_df['nbRobots'] = normalized_df['gInitialNumberOfRobots']
normalized_df['nbObj'] = normalized_df['gNbOfPhysicalObjects']

normalized_df

In [None]:
result = sm.ols(formula="truecoopopti ~ gNbOfPhysicalObjects + nTolerance + nOpti + evaluationTime", data=normalized_df).fit()
result.summary()

In [None]:
result = sm.ols(formula="medfit ~ gNbOfPhysicalObjects + nTolerance + nOpti", data=normalized_df).fit()
result.summary()

In [None]:
df['ratio'] = (df['gNbOfPhysicalObjects'] / 100)
ntol=1
fig, axs = plt.subplots(3, 1, figsize=(5, 12))
st = fig.suptitle(f"Tolerance={ntol}", fontsize=14)

curcond = df.query(f'nTolerance == {ntol} and evaluationTime == 1000')
sns.heatmap(curcond.pivot_table('meanfit', 'nOpti', 'gNbOfPhysicalObjects'), cmap=sns.cm.rocket_r, ax=axs[0])
axs[0].set_ylabel('Nombre optimal par opp')
axs[0].set_xlabel('Nombre d\'opportunités')
axs[0].set_title('Fitness Moyenne')


sns.heatmap(curcond.pivot_table('truecoopopti', 'nOpti', 'gNbOfPhysicalObjects'), cmap=sns.cm.rocket_r, vmin=0, vmax=10, ax=axs[1])
axs[1].set_ylabel('Nombre optimal par opp')
axs[1].set_xlabel('Nombre d\'opportunités')
axs[1].set_title('Coopération Moyenne')

def bellcurve(x, mu, sigma):
    return  1.0 / np.sqrt(2 * np.pi) * 1.0 / sigma * np.exp(- ((x - mu) * (x - mu)) / (2 * sigma * sigma))

def alonepayoff(x):
    a = 5
    b = 10
    return a * x  - 0.5 * x**2

df['betteralone'] = bellcurve(1, df['nOpti'], df['nTolerance']) * alonepayoff(5) * 1000
df['difffit'] = (df['meanfit'] - df['betteralone'])
sns.heatmap(curcond.pivot_table('difffit', 'nOpti', 'gNbOfPhysicalObjects'), linewidth=0.01, cmap=sns.cm.rocket_r, ax=axs[2])
axs[2].set_ylabel('Nombre optimal par opp')
axs[2].set_xlabel('Nombre d\'opportunités')
axs[2].set_title('Différence entre fitness seule espérée et fitness moyenne')
fig.tight_layout()

# shift subplots down:
st.set_y(0.95)
fig.subplots_adjust(top=0.90)
plt.show(fig)

In [None]:
df.query('nOpti == 2 and evaluationTime == 1000 and nTolerance == 0.2 and gNbOfPhysicalObjects == 10')[['nOpti', 'gNbOfPhysicalObjects', 'medfit', 'betteralone', 'difffit']]