In [15]:
import pandas as pd
import numpy as np
from scipy.stats import ttest_ind
# import dc_stat_think as dc
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
import os

In [16]:
import sqlalchemy as sql


db_user = os.environ.get('DB_USER')
db_pass = os.environ.get('DB_PASS')
db = 'mua_data'

con_str =f"mysql+pymysql://{db_user}:{db_pass}@localhost/{db}"
eng = sql.create_engine(con_str)

In [17]:
def get_ids(eng):
    q = '''SELECT neuron_id 
            FROM neurons 
            INNER JOIN recordings
                ON neurons.recording_id=recordings.recording_id 

            WHERE recordings.group_id IN (1, 2, 3, 4) 
                AND (recordings.excluded=0 OR recordings.excluded IS NULL)'''

    df = pd.read_sql(q, eng)
    ids = df['neuron_id'].values
    ids = list(map(str, ids))
    return ids

In [18]:
def get_ifr_sub(d, ids):
    ifr_all = pd.read_csv(d)
    return ifr_all.loc[:, ifr_all.columns.isin(ids)]

In [19]:
####### TS


def inclustion_exclustion(ifr, min_fr=0., exp_start=None, 
                          time_before=None,
                         time_after=None):
    if exp_start is None:
        exp_start = pd.Timedelta(3600, unit='s')
        
    if time_before is None:
        time_before = pd.Timedelta(1800, unit='s')
    if time_after is None:
        time_after = pd.Timedelta(2400, unit='s')
    
    ifr = ifr.fillna(0)
    ifr = _get_timeperiod(ifr, exp_start, 
                          time_before, time_after)
    ifr = _fr_exlcusion(ifr, min_fr, exp_start)
    return ifr

def _get_timeperiod(ifr, exp_start=None, 
                    time_before=None, time_after=None):
    
    lower = exp_start - time_before
    upper = exp_start + time_after

    
    return ifr.loc[(ifr.index>lower) & (ifr.index<upper)]


def _fr_exlcusion(ifr, min_fr, exp_start):
    ifr = ifr.loc[:, ifr.columns[(ifr.loc[:exp_start]>min_fr).all()]]

    return ifr



def _ifr_to_ts(ifr, period):
    ifr.index = pd.timedelta_range(start=0, periods=len(ifr), freq='s')
    ifr = ifr.resample(period).mean()
    return ifr

def _time_cuttoff(ifr, exp_start, time_before, time_after):
    lower = exp_start - time_before
    upper = exp_start + time_after
    return ifr.loc[(ifr.index>lower) & (ifr.index<upper)]


def dress_up_ifr(ifr, time_before=None, time_after=None, 
                 rs_period=None, exp_start=None):
    if time_before is None:
        time_before = pd.Timedelta(2400, unit='s')
  
    if time_after is None:
        time_after = pd.Timedelta(2400, unit='s')
        
    if rs_period is None:
        rs_period = '30s'
    if exp_start is None:
        exp_start = pd.Timedelta(3600, unit='s')
    
    ifr = ifr.fillna(0)
    ifr = _ifr_to_ts(ifr, rs_period)
    ifr = _time_cuttoff(ifr, exp_start, time_before, time_after)
    return ifr

In [20]:
from scipy import stats

def extreme_value_remover(col):
    ser = pd.Series(stats.zscore(col), index=col.index)
    col[np.abs(ser) > 3] = np.median(ser)
    
    return col


In [21]:
d = '/home/ruairi/data/tmp/ifr/all.csv'
ids = get_ids(eng)
ifr_sub = get_ifr_sub(d, ids)

ifr_ts = dress_up_ifr(ifr_sub.copy())
ifr_ts.loc[:pd.Timedelta(3600, unit='s'), :] = ifr_ts.loc[:pd.Timedelta(3600, unit='s'), :].apply(extreme_value_remover)

ifr_ts = ifr_ts.rolling('130s').median()

ifr_ts = inclustion_exclustion(ifr_ts, min_fr=0.5, 
                               time_before=pd.Timedelta(600, unit='s'))

In [22]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaler.fit(ifr_ts.loc[:pd.Timedelta(3600, 's')].values)

StandardScaler(copy=True, with_mean=True, with_std=True)

In [23]:

dfz = pd.DataFrame(scaler.transform(ifr_ts), 
                   index=ifr_ts.index,
                  columns=ifr_ts.columns)

In [32]:
def effect_checker(col, z_cutoff, num_periods=10):
    if (col<= -z_cutoff).sum() >= num_periods:
        ret = 'decrease'
    elif (col >= z_cutoff).sum() >= num_periods:
        ret = 'increase'
    else:
        ret = 'no effect'
    return ret

In [33]:
effects = dfz[dfz.index>=pd.Timedelta(3600, 's')].apply(effect_checker, 
                                                        z_cutoff=)

In [35]:
effects.value_counts()

decrease     90
no effect    61
increase     46
dtype: int64

In [36]:
effects = effects.reset_index()
effects.columns = ['neuron_id', 'response']


In [37]:
effects.to_csv('/home/ruairi/data/tmp/zscore_de_talk2.csv', index=False)

In [38]:
len(dfz)

99