# Imports etc.

In [31]:
import ipdb
import numpy as np
import os
import pandas as pd
import plotnine as gg
import scipy
# from scipy.optimize import curve_fit
gg.theme_set(gg.theme_classic)

from Functions import get_info_from_fullID, add_meta_column, sigmoid, get_session #, load_mouse_data, load_one_measure, remove_na_trials, 

In [2]:
# fitted_param_dir = 'C:/Users/maria/MEGAsync/SLCN/PShumanData/fitting/mice/'
mouse_data_dir = 'C:/Users/maria/MEGAsync/SLCN/PSMouseData'
# plot_dir = 'C:/Users/maria/MEGAsync/SLCN/models/plots'
# if not os.path.exists(plot_dir):
#     os.mkdir(plot_dir)

In [3]:
n_trial_cutoff = 500  # 500 as of 2020/07/21
n_session_cutoff = 10  # 10 as of 2020/07/21
rt_cut_off = 4  # 4 seconds as of 2020/08/04; 10 seconds as of 2020/07/29

# Functions to load and format mouse data

In [4]:
def load_one_measure(name, data_dir):
    
    measure_j = pd.read_csv(os.path.join(data_dir, 'Juvi_{}.csv'.format(name))).T.values  # after reading in: [trials x animals]
    measure_a = pd.read_csv(os.path.join(data_dir, 'Adult_{}.csv'.format(name))).T.values
    measure_dat = np.hstack([measure_j, measure_a])
    
    return pd.DataFrame(measure_dat)

# # Example use
# load_one_measure('Reward', mouse_data_dir)

In [5]:
def remove_na_trials(measure_dat, missed_trials):
    
    measure_dat[missed_trials] = np.nan
    measure_dat = measure_dat.apply(lambda x: pd.Series(x.dropna().values))
    
    return measure_dat

# # Example use
# remove_na_trials(load_one_measure('Reward', mouse_data_dir))

In [6]:
# # rts = load_one_measure('ITI', mouse_data_dir)  # Lung-Hao: ITI is the time of last nose poke event (in or out) of previous trial to center poke of current trial. So it's the ITI proceeding the current trial. The first trial has ITI because we removed the trials before first switch.
# # actions = load_one_measure('Choice', mouse_data_dir)
# # rts[rts > rt_cut_off] = np.nan
# print(np.mean((rts > rt_cut_off).stack()))
# print(np.mean((np.isnan(actions).stack())))
# print(np.mean((rts > rt_cut_off).stack()) + np.mean((np.isnan(actions).stack())))
# np.mean(((rts > rt_cut_off) | (np.isnan(actions))).stack())

In [7]:
def load_mouse_data(data_dir):

    # Load mouse data
    rewards = load_one_measure('Reward', mouse_data_dir)
    rts = load_one_measure('ITI', mouse_data_dir)  # Lung-Hao: ITI is the time of last nose poke event (in or out) of previous trial to center poke of current trial. So it's the ITI proceeding the current trial. The first trial has ITI because we removed the trials before first switch.
    actions = load_one_measure('Choice', mouse_data_dir)
    correct_actions = load_one_measure('TaskData', mouse_data_dir)
    corrects = (actions == correct_actions).astype('int')  # When did mice choose the right action?

    fullID_j = pd.read_csv(os.path.join(data_dir, 'Juvi_AnimalID.csv')).T.values.flatten()
    fullID_a = pd.read_csv(os.path.join(data_dir, 'Adult_AnimalID.csv')).T.values.flatten()
    fullIDs = np.concatenate([fullID_j, fullID_a])
    
    # Remove na trials and too-fast trials by shifting up later trials
    missed_trials = np.isnan(actions) | (rts > rt_cut_off)
    actions = remove_na_trials(actions, missed_trials)
    rewards = remove_na_trials(rewards, missed_trials)
    rts = remove_na_trials(rts, missed_trials)
    corrects = remove_na_trials(corrects, missed_trials)
    correct_actions = remove_na_trials(correct_actions, missed_trials)

    # Make sure all dataframes have the same shape
    assert np.shape(rewards) == np.shape(actions)
    assert np.shape(corrects) == np.shape(correct_actions)
    assert np.shape(rewards) == np.shape(correct_actions)

    return {
        'actions': actions,
        'rewards': rewards,
        'corrects': corrects,
        'rts': rts,
        'correct_actions': correct_actions,
        'fullIDs': fullIDs,
    }

# # Example use
# raw_dat = load_mouse_data(mouse_data_dir)
# raw_dat

In [8]:
def get_subj_dat(raw_dat, fullID, i):
    
    subj_dat = {
        'action': raw_dat['actions'].loc[:, i],
        'reward': raw_dat['rewards'].loc[:, i],
        'rt': raw_dat['rts'].loc[:, i],
        'correct': raw_dat['corrects'].loc[:, i],
        'trial': range(len(raw_dat['actions'].loc[:, i])),
        'block': np.append([0], np.cumsum(np.abs(np.diff(raw_dat['correct_actions'].loc[:, i])))),
        'fullID': [fullID] * len(raw_dat['actions'].loc[:, i]),
        }

    # Make sure we're not introducing nans
    assert np.mean(np.isnan(raw_dat['rts'].loc[:, i])) == np.mean(np.isnan(subj_dat['rt']))

    return pd.DataFrame(subj_dat)

# # Example use
# get_subj_dat(raw_dat, 0, 0)

In [9]:
def get_ago_col(subj_dat, colname, n):
    
    return np.append(n * [np.nan], subj_dat[colname][:-n])

# # Example use
# subj_dat = get_subj_dat(raw_dat, raw_dat['fullIDs'][0], 0)
# colname = 'action'
# n = 2

# subj_dat['{}_{}ago'.format(colname[:3], n)] = get_ago_col(subj_dat, colname, n)
# subj_dat

In [10]:
def format_mouse_data(raw_dat, trials2switch_lower=3):

    true_dat = pd.DataFrame()
    for i, fullID in enumerate(raw_dat['fullIDs']):

        print("Formatting mouse-session {} of {}.".format(i, len(raw_dat['fullIDs'])), end='\r')

        subj_dat = get_subj_dat(raw_dat, fullID, i)
        
        # Add fullID info
        for col in ['agegroup', 'sex', 'age', 'animal']:
            subj_dat[col] = get_info_from_fullID(fullID, col)

        # Add session info (within animal)
        for age in np.unique(subj_dat.age):
            
            # Get ago columns (rew_1ago, act_1ago, rew_2ago, ...)
            for colname in ['reward', 'correct', 'action']:
                for n in range(1, 20):
                    subj_dat['{}_{}ago'.format(colname[:3], n)] = get_ago_col(subj_dat, colname, n)

            # Add block info (within session, within animal)
            for block in np.unique(subj_dat.block):
                sub_idx = (subj_dat.block == block) & (subj_dat.age == age)
                block_length = sum(sub_idx)

                if block_length > trials2switch_lower + 2:
                    block_trial = range(block_length)
                    trials2switch = np.append(np.arange(block_length-trials2switch_lower),
                                              np.arange(-trials2switch_lower, 0))
                else:
                    block_trial = [np.nan] * block_length
                    trials2switch = [np.nan] * block_length

                subj_dat.loc[sub_idx, 'block_length'] = block_length
                subj_dat.loc[sub_idx, 'block_trial'] = block_trial
                subj_dat.loc[sub_idx, 'trials2switch'] = trials2switch
        
        true_dat = true_dat.append(subj_dat)
        
    # Add columns
    for animal in np.unique(true_dat.animal):
        true_dat.loc[true_dat.animal == animal, 'session'] = get_session(true_dat.loc[true_dat.animal == animal])
    true_dat['rew_1&2ago'] = [str(r1)[0] + str(r2)[0] for r1, r2 in zip(true_dat['rew_1ago'], true_dat['rew_2ago'])]  ###
    true_dat['cor_1&2ago'] = [str(r1)[0] + str(r2)[0] for r1, r2 in zip(true_dat['cor_1ago'], true_dat['cor_2ago'])]  ###
    true_dat['act_1&2ago'] = [str(r1)[0] + str(r2)[0] for r1, r2 in zip(true_dat['act_1ago'], true_dat['act_2ago'])]  ###
    true_dat['meta'] = add_meta_column(true_dat)
    
    # Add 'switch' and 'stay' columns
    true_dat['switch'] = true_dat['action'] != true_dat['act_1ago']
    true_dat['stay'] = 1 - true_dat['switch']

#     assert len(raw_dat['fullIDs']) * 782 == true_dat.shape[0]
    
    return true_dat

# # Example use
# true_dat_all = format_mouse_data(raw_dat)
# true_dat_all

In [11]:
# # Summarize over session to count number of trials
# def get_n_trial_dat(true_dat):
    
#     n_trial_dat = true_dat[['animal', 'session', 'action', 'sex', 'agegroup']
#                           ].groupby(['animal', 'session', 'sex', 'agegroup']
#                           ).agg(['count']).reset_index().rename(columns={'action': 'n_trials'})
#     return n_trial_dat

# # # Example use
# # get_n_trial_dat(true_dat_all)

# Functions to load and format simulated data

In [12]:
# Get remaining info for this simulation
def format_sim_data(sim_dat, model_name, selected_mouse=0, save_as_csv=True):
    
    """
    Takes raw `sim_data` (e.g., read in from os.path.join(fitted_param_dir, 'simulations/simulated_mice_{}_nagents{}.csv'.format(model_name, n_agents)))
    Adds all the columns necessary for behavioral analyses.
    """
    
    # Pull out behavior of one specific simulation into separate columns
    for col in ['actionall', 'rewardall', 'correctall']:
        sim_dat[col[:-3]] = sim_dat[col].apply(lambda x: x[1 + 2 * selected_mouse]).astype('int')
        
    # Add gender and session for each simulated animal
    for animal in np.unique(sim_dat.animal):
        print('animal {} / {}'.format(animal, len(np.unique(sim_dat.animal))))
        sub_idx_a = sim_dat.animal == animal
        sim_dat.loc[sub_idx_a, 'gender'] = get_info_from_fullID(sim_dat.loc[sub_idx_a, 'fullID'].reset_index(drop=True)[0], sum(sub_idx_a))['gender']
        sim_age = sim_dat.loc[sub_idx_a, 'age']
        sim_dat.loc[sub_idx_a, 'session'] = sim_age - min(sim_age)

        # Add rew_1ago, rew_2ago, switch for each session
        for age in np.unique(sim_dat.loc[sub_idx_a, 'age']):
            sub_idx_aa = (sim_dat.animal == animal) * (sim_dat.age == age)
            sim_dat.loc[sub_idx_aa, 'rew_1ago'] = np.append(np.nan, sim_dat.loc[sub_idx_aa, 'reward'][:-1])
            sim_dat.loc[sub_idx_aa, 'rew_2ago'] = np.append(np.nan, sim_dat.loc[sub_idx_aa, 'rew_1ago'][:-1])
            sim_dat.loc[sub_idx_aa, 'switch'] = np.append(np.nan, np.abs(np.diff(sim_dat.loc[sub_idx_aa, 'action'].astype('int'))))

            # Add block_length, block_trial, trials2swtich for each block
            for block in np.unique(sim_dat.loc[sub_idx_aa, 'block']):
                sub_idx_baa = (sim_dat.block == block) * (sim_dat.animal == animal) * (sim_dat.age == age)
                block_length = sum(sub_idx_baa)

                if block_length > 5:
                    block_trial = range(block_length)
                    trials2switch = np.append(np.arange(block_length-3), np.arange(-3, 0))
                else:
                    block_trial = [np.nan] * block_length
                    trials2switch = [np.nan] * block_length

                sim_dat.loc[sub_idx_baa, 'block_length'] = block_length
                sim_dat.loc[sub_idx_baa, 'block_trial'] = block_trial
                sim_dat.loc[sub_idx_baa, 'trials2switch'] = trials2switch

    # Format gender; add rew_12ago and agegroup
#     true_dat['gender'] = true_dat['gender'].replace({1: 'Male', 2: 'Female'})
    sim_dat['rew_12ago'] = [str(r1)[0] + str(r2)[0] for r1, r2 in zip(sim_dat['rew_1ago'], sim_dat['rew_2ago'])]
    sim_dat.loc[sim_dat.age > 65, 'agegroup'] = 'Adult'  # adults: 69-90
    sim_dat.loc[sim_dat.age < 65, 'agegroup'] = 'Juvenile'  # juveniles: 38-60
    
    # Save as csv
    if save_as_csv:
        sim_dat.to_csv(os.path.join(mouse_data_dir, 'formatted_sim_mouse_data_{}.csv'.format(model_name)), index=False)
        
    return sim_dat

# # Example use
# n_agents = 10
# modelnames = [f for f in os.listdir(fitted_param_dir) if ('.csv' in f) and ('params' in f) and ('B' not in f)]
# model_names = [modelname.split('_')[1] for modelname in modelnames]
# simulation_dir = os.path.join(fitted_param_dir, 'simulations/simulated_mice_{}_nagents{}.csv'.format(model_name, n_agents))
# sim_dat = pd.read_csv(simulation_dir)
# format_sim_data(sim_dat, model_name, selected_mouse=0, save_as_csv=True)

# Read in and clean mouse data

In [13]:
# Read in and format mouse data
true_dat_all_dir = os.path.join(mouse_data_dir, 'mouse_true_dat_raw.csv')
print("Reading in raw mouse data...")
raw_dat = load_mouse_data(mouse_data_dir)
print("Cleaning mouse data...")
true_dat_all = format_mouse_data(raw_dat)
print("Save cleaned csv.                            ")
true_dat_all.to_csv(true_dat_all_dir, index=False)
true_dat_all

Reading in raw mouse data...
Cleaning mouse data...
Save cleaned csv.                            


Unnamed: 0,action,reward,rt,correct,trial,block,fullID,agegroup,sex,age,...,block_length,block_trial,trials2switch,session,rew_1&2ago,cor_1&2ago,act_1&2ago,meta,switch,stay
0,0.0,0.0,1.15620,0.0,0,0.0,1404012,Juvenile,Male,40,...,12.0,0.0,0.0,0.0,nn,nn,nn,1-3,True,0
1,0.0,0.0,0.28317,0.0,1,0.0,1404012,Juvenile,Male,40,...,12.0,1.0,1.0,0.0,0n,0n,0n,1-3,False,1
2,1.0,1.0,0.46666,1.0,2,0.0,1404012,Juvenile,Male,40,...,12.0,2.0,2.0,0.0,00,00,00,1-3,True,0
3,1.0,1.0,1.28200,1.0,3,0.0,1404012,Juvenile,Male,40,...,12.0,3.0,3.0,0.0,10,10,10,1-3,False,1
4,1.0,1.0,0.96366,1.0,4,0.0,1404012,Juvenile,Male,40,...,12.0,4.0,4.0,0.0,11,11,11,1-3,False,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
768,,,,,768,,1309021,Adult,Female,90,...,,,,15.0,nn,nn,nn,,True,0
769,,,,,769,,1309021,Adult,Female,90,...,,,,15.0,nn,nn,nn,,True,0
770,,,,,770,,1309021,Adult,Female,90,...,,,,15.0,nn,nn,nn,,True,0
771,,,,,771,,1309021,Adult,Female,90,...,,,,15.0,nn,nn,nn,,True,0


In [14]:
# Subset 500 trials and 10 sessions
true_dat = true_dat_all.loc[
    (true_dat_all.trial < n_trial_cutoff) &  # cap at 500 trials
    (true_dat_all.session <= n_session_cutoff)  # cap at 10 sessions
]
excl_dat = true_dat.loc[np.isnan(true_dat.action)].groupby(['fullID']).aggregate('mean').reset_index()[['fullID']]  # , 'age', 'animal', 'session'

true_dat['excluded'] = False
for fullID in excl_dat.fullID:
    true_dat.loc[true_dat.fullID == fullID, 'excluded'] = True
    
true_dat = true_dat.loc[np.invert(true_dat.excluded)]
assert np.mean(true_dat.excluded) == 0
true_dat

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user

Unnamed: 0,action,reward,rt,correct,trial,block,fullID,agegroup,sex,age,...,block_trial,trials2switch,session,rew_1&2ago,cor_1&2ago,act_1&2ago,meta,switch,stay,excluded
0,0.0,0.0,1.15620,0.0,0,0.0,1404012,Juvenile,Male,40,...,0.0,0.0,0.0,nn,nn,nn,1-3,True,0,False
1,0.0,0.0,0.28317,0.0,1,0.0,1404012,Juvenile,Male,40,...,1.0,1.0,0.0,0n,0n,0n,1-3,False,1,False
2,1.0,1.0,0.46666,1.0,2,0.0,1404012,Juvenile,Male,40,...,2.0,2.0,0.0,00,00,00,1-3,True,0,False
3,1.0,1.0,1.28200,1.0,3,0.0,1404012,Juvenile,Male,40,...,3.0,3.0,0.0,10,10,10,1-3,False,1,False
4,1.0,1.0,0.96366,1.0,4,0.0,1404012,Juvenile,Male,40,...,4.0,4.0,0.0,11,11,11,1-3,False,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,0.0,0.0,3.70600,0.0,495,26.0,1308321,Adult,Female,83,...,2.0,2.0,10.0,00,10,10,9-11,True,0,False
496,0.0,0.0,1.91100,0.0,496,26.0,1308321,Adult,Female,83,...,3.0,3.0,10.0,00,01,01,9-11,False,1,False
497,1.0,1.0,3.54000,1.0,497,26.0,1308321,Adult,Female,83,...,4.0,4.0,10.0,00,00,00,9-11,True,0,False
498,1.0,0.0,0.52400,1.0,498,26.0,1308321,Adult,Female,83,...,5.0,5.0,10.0,10,10,10,9-11,False,1,False


In [15]:
# true_dat.loc[true_dat.rt < 0].groupby(['fullID']).aggregate('max').reset_index()[['age', 'sex', 'animal', 'fullID', 'block', 'rt']]

In [16]:
# # Remove RTs < 0
# true_dat.loc[true_dat.rt < 0, 'rt'] = np.nan

In [17]:
# RT distribution
np.quantile(true_dat.loc[np.invert(np.isnan(true_dat.rt)), 'rt'], [0.8, 0.9, 0.95, 0.98, 0.99])

array([0.7835  , 1.17    , 1.717525, 2.583   , 3.134   ])

In [18]:
# # Remove RTs > 10 sec
# true_dat.loc[true_dat.rt > rt_cut_off, 'rt'] = np.nan
# true_dat

In [19]:
true_dat.describe()

Unnamed: 0,action,reward,rt,correct,trial,block,fullID,age,animal,rew_1ago,...,act_15ago,act_16ago,act_17ago,act_18ago,act_19ago,block_length,block_trial,trials2switch,session,stay
count,191500.0,191500.0,191500.0,191500.0,191500.0,191500.0,191500.0,191500.0,191500.0,191117.0,...,185755.0,185372.0,184989.0,184606.0,184223.0,191500.0,191500.0,191500.0,191500.0,191500.0
mean,0.512308,0.603091,0.652097,0.797869,249.5,9.850115,1934056.0,55.814621,19.284595,0.60306,...,0.512788,0.512758,0.512712,0.512621,0.51265,26.756642,12.746658,9.81848,4.804178,0.835039
std,0.49985,0.489258,0.531991,0.40159,144.337655,6.159996,1103933.0,16.969518,11.052713,0.489265,...,0.499838,0.499839,0.49984,0.499842,0.499841,8.293347,9.047259,9.028474,3.06685,0.371146
min,0.0,0.0,0.004,0.0,0.0,0.0,107511.0,38.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,7.0,0.0,-3.0,0.0,0.0
25%,0.0,0.0,0.391,1.0,124.75,4.0,1007521.0,42.0,10.0,0.0,...,0.0,0.0,0.0,0.0,0.0,21.0,5.0,2.0,2.0,1.0
50%,1.0,1.0,0.48783,1.0,249.5,10.0,1904722.0,46.0,19.0,1.0,...,1.0,1.0,1.0,1.0,1.0,27.0,11.0,8.0,5.0,1.0
75%,1.0,1.0,0.6935,1.0,374.25,15.0,2904222.0,76.0,29.0,1.0,...,1.0,1.0,1.0,1.0,1.0,32.0,19.0,16.0,7.0,1.0
max,1.0,1.0,4.0,1.0,499.0,26.0,3806012.0,86.0,38.0,1.0,...,1.0,1.0,1.0,1.0,1.0,62.0,61.0,58.0,10.0,1.0


In [20]:
assert np.sum(np.sum(np.isnan(true_dat[['action', 'reward', 'rt', 'correct']]))) == 0

# Fit sigmoid functions

In [37]:
def sigmoid(x, L, x0, beta, bias):
    return L / (1 + np.exp(-beta * (x - x0))) + bias

# Test
sigmoid(x=3, L=1, x0=0.5, beta=4, bias=0)

0.9999546021312976

In [38]:
# Summarize data for regression
# plot_dat_learn = true_dat[true_dat['trial'] < 8].groupby(['animal', 'meta', 'trial']).aggregate('mean').reset_index()

plot_dat_learn = true_dat[true_dat['trial'] < 8].groupby(['fullID', 'trial']).aggregate('mean').reset_index()
plot_dat_learn['sex'] = plot_dat_learn.fullID.apply(lambda x : get_info_from_fullID(x, 'sex'))
plot_dat_learn['agegroup'] = plot_dat_learn.fullID.apply(lambda x : get_info_from_fullID(x, 'agegroup'))
plot_dat_learn['meta'] = add_meta_column(plot_dat_learn)

plot_dat_learn

Unnamed: 0,fullID,trial,action,reward,rt,correct,block,age,animal,rew_1ago,...,block_length,block_trial,trials2switch,session,switch,stay,excluded,sex,agegroup,meta
0,107511,0,1.0,0.0,0.65200,0.0,0.0,75,1,,...,21.0,0.0,0.0,0.0,True,0,False,Male,Adult,1-3
1,107511,1,1.0,0.0,0.02050,0.0,0.0,75,1,0.0,...,21.0,1.0,1.0,0.0,False,1,False,Male,Adult,1-3
2,107511,2,0.0,1.0,0.38800,1.0,0.0,75,1,0.0,...,21.0,2.0,2.0,0.0,True,0,False,Male,Adult,1-3
3,107511,3,0.0,1.0,0.37600,1.0,0.0,75,1,1.0,...,21.0,3.0,3.0,0.0,False,1,False,Male,Adult,1-3
4,107511,4,0.0,1.0,0.42600,1.0,0.0,75,1,1.0,...,21.0,4.0,4.0,0.0,False,1,False,Male,Adult,1-3
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3059,3806012,3,1.0,0.0,3.25070,0.0,0.0,60,38,0.0,...,29.0,3.0,3.0,8.0,True,0,False,Male,Juvenile,9-11
3060,3806012,4,0.0,1.0,0.88333,1.0,0.0,60,38,0.0,...,29.0,4.0,4.0,8.0,True,0,False,Male,Juvenile,9-11
3061,3806012,5,0.0,1.0,0.66933,1.0,0.0,60,38,1.0,...,29.0,5.0,5.0,8.0,False,1,False,Male,Juvenile,9-11
3062,3806012,6,0.0,1.0,0.47533,1.0,0.0,60,38,1.0,...,29.0,6.0,6.0,8.0,False,1,False,Male,Juvenile,9-11


In [41]:
def fit_sigmoid_to_one_dataset(xs, ys, p0):

    y_pred = []
    p_opts = np.empty([0, 4])

    popt, pcov = scipy.optimize.curve_fit(sigmoid, xs, ys, p0, method='dogbox', maxfev=1e9)
    y_pred += list(sigmoid(xs, *popt))
    p_opts = np.vstack([
        p_opts,
        np.tile(popt, len(xs)).reshape([len(xs), len(popt)])
    ])
    
    result = pd.DataFrame({'y_pred': y_pred})
    for col_i, col_name in enumerate(['L', 'x0', 'beta', 'bias']):
        result[col_name] = p_opts[:, col_i]
    
    return result

In [42]:
# Test fit_sigmoid_to_one_dataset(xs, ys, p0)

p0 = [1, 5, 1, 0]  # mandatory initial guess
sub_dat = plot_dat_learn[
    (plot_dat_learn['animal'] == plot_dat_learn['animal'][0])
    & (plot_dat_learn['meta'] == '1-3')
].groupby(['meta', 'trial']).aggregate('mean').reset_index()

fit_sigmoid_to_one_dataset(sub_dat['trial'], sub_dat['correct'], p0)


# # Another test
# p0 = [1, np.median(xdata), 1, 0]  # mandatory initial guess
# results = pd.DataFrame(columns=fit_sigmoid_to_one_dataset(sub_dat['trial'], sub_dat['correct'], p0).columns)

# for animal in np.unique(plot_dat_learn['animal'])[2:4]:
#     print(animal)
#     for meta in np.unique(plot_dat_learn['meta'])[:2]:
#         print(meta)
        
#         # Get data from one animal from one "meta" conglomerate
#         sub_dat = plot_dat_learn[
#             (plot_dat_learn['animal'] == animal) &
#             (plot_dat_learn['meta'] == meta)
#         ]
            
#         # Average these data to get sigmoid learning curves
#         mean_dat = sub_dat.groupby(['trial']).aggregate('mean').reset_index()
#         trial = mean_dat['trial']
#         correct = mean_dat['correct']
        
#         # Fit sigmoid to this dataset
#         result = fit_sigmoid_to_one_dataset(
#             xs=trial,
#             ys=correct,
#             p0=p0,
#         )
#         result['animal'] = animal
#         result['meta'] = meta
#         result['trial'] = trial
#         result['correct'] = correct
        
#         # Save results to big dataframe
#         results = results.append(result)
        
# results

Unnamed: 0,y_pred,L,x0,beta,bias
0,8.30975e-07,0.800001,1.934179,24.451271,8.30975e-07
1,8.310712e-07,0.800001,1.934179,24.451271,8.30975e-07
2,0.6666638,0.800001,1.934179,24.451271,8.30975e-07
3,0.8000016,0.800001,1.934179,24.451271,8.30975e-07
4,0.8000016,0.800001,1.934179,24.451271,8.30975e-07
5,0.8000016,0.800001,1.934179,24.451271,8.30975e-07
6,0.8000016,0.800001,1.934179,24.451271,8.30975e-07
7,0.8000016,0.800001,1.934179,24.451271,8.30975e-07


In [45]:
# Fit sigmoid to all mice

p0 = [1, 5, 1, 0]  # mandatory initial guess
results = pd.DataFrame(columns=fit_sigmoid_to_one_dataset(sub_dat['trial'], sub_dat['correct'], p0).columns)

for animal in np.unique(plot_dat_learn['animal']):
    print('mouse', animal)
    for meta in np.unique(plot_dat_learn['meta']):
        print('\t meta', meta)
        
        # Get data from one animal from one "meta" conglomerate
        sub_dat = plot_dat_learn[
            (plot_dat_learn['animal'] == animal) &
            (plot_dat_learn['meta'] == meta)
        ].reset_index()
            
        # Average these data to get sigmoid learning curves
        mean_dat = sub_dat.groupby(['trial']).aggregate('mean').reset_index()
        trial = mean_dat['trial']
        correct = mean_dat['correct']
        
        # Fit sigmoid to this dataset
        if (len(trial) > 0) & (len(correct) > 0):
            result = fit_sigmoid_to_one_dataset(
                xs=trial,
                ys=correct,
                p0=p0,
            )
            result['trial'] = trial
            result['correct'] = correct
            result['animal'] = animal
            result['meta'] = meta
            result['agegroup'] = sub_dat['agegroup'][0]
            result['sex'] = sub_dat['sex'][0]
            result['age'] = sub_dat['age'][0]

            # Save results to big dataframe
            results = results.append(result)
        
results

mouse 1
	 meta 1-3
	 meta 4-8
	 meta 9-11
mouse 2
	 meta 1-3
	 meta 4-8
	 meta 9-11
mouse 3
	 meta 1-3
	 meta 4-8
	 meta 9-11
mouse 4
	 meta 1-3
	 meta 4-8
	 meta 9-11
mouse 5
	 meta 1-3
	 meta 4-8


KeyboardInterrupt: 

In [None]:
results.to_csv(os.path.join(mouse_data_dir, 'sigmoid_results.csv'), index=False)

## Save true_dat csv and use this for modeling!

In [21]:
true_dat.to_csv(os.path.join(mouse_data_dir, 'true_dat.csv'), index=False)