In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import sqlalchemy as sql
import os

In [2]:
db_user = os.environ.get('DB_USER')
db_pass = os.environ.get('DB_PASS')
db = 'mua_data'
con_str =f"mysql+pymysql://{db_user}:{db_pass}@localhost/{db}"
eng = sql.create_engine(con_str)

# Load IFR and Labelled Neurons

In [3]:
ifr = pd.read_csv('/home/ruairi/data/tmp/ifr/all.csv')

In [4]:
dfc = pd.read_csv('/home/ruairi/data/tmp/classified_clustered_merge.csv')
dfc = dfc[['label', 'neuron_id', 'colors']]

In [5]:
import collections

    

def _resample(df, resample_period):
    n_rows = df.shape[0]
    df1 = df.copy()
    df1.index = pd.TimedeltaIndex(pd.to_timedelta(np.arange(n_rows), unit='s'))
    return df1.resample(resample_period).mean()


def _ifr_to_long(df):
    df.index.name = 'timepoint'
    df = df.reset_index()
    melt = pd.melt(df, id_vars='timepoint', var_name='neuron_id', value_name='firing_rate')
    melt['neuron_id'] = pd.to_numeric(melt['neuron_id'])
    return melt


def _merge(df1, df2):
    return pd.merge(left=df1, right=df2, on='neuron_id')


def _experimental_info_by_neuron(eng, exclude_groups):
    q = '''SELECT neurons.neuron_id, experimental_groups.cond1 AS treatment, experimental_groups.virus,
              experiment_name, experimental_groups.group_id, experimental_groups.pretreatment AS pretreatment
        FROM neurons
    INNER JOIN recordings ON
        neurons.recording_id=recordings.recording_id
    INNER JOIN experimental_groups ON 
        recordings.group_id=experimental_groups.group_id
    INNER JOIN experiments ON 
        experimental_groups.experiment_id=experiments.experiment_id
    WHERE (recordings.excluded=0 OR recordings.excluded IS NULL)'''

    df = pd.read_sql(q, eng)
    if exclude_groups is not None:
        if not isinstance(exclude_groups, collections.Iterable):
            exclude_groups = [exclude_groups]
        df = df[~df['group_id'].isin(exclude_groups)]
    #return pd.read_sql(q, eng)
    return df


    


def create_long_data(ifr, dfc, eng, resample_period='10s', 
                     min_time='20min', max_time='100min', 
                     exclude_groups=None, fill=False):
    '''
    * Resample IFR 
    * Merge IFR cluster labels and colors
    *
    
    '''
    df = _resample(ifr, resample_period)
    df = _ifr_to_long(df)
    df = _merge(df, dfc)
    exp = _experimental_info_by_neuron(eng, exclude_groups)
    df = _merge(df, exp)
    df = df[(df['timepoint']>=min_time) & (df['timepoint']<=max_time)]
    if fill:
        df['firing_rate'] = df['firing_rate'].fillna(0)
    return df

In [6]:
df = create_long_data(ifr, dfc, eng,
                      exclude_groups=[5, 6], fill=True)

### Add drug responses

In [7]:
zscore = pd.read_csv('/home/ruairi/data/tmp/zscore_de_talk2.csv')
zscore.head()

Unnamed: 0,neuron_id,response
0,1,decrease
1,10,decrease
2,100,no effect
3,101,increase
4,102,increase


In [8]:
df = pd.merge(how='left', left=df, right=zscore, on='neuron_id')
df['response'] = df['response'].fillna('excluded')

In [9]:
df['time'] = pd.to_numeric(df['timepoint']) * 1e-9 / 60

In [10]:
df['time'] = df['time'] - np.mean(df['time'])

In [11]:
df.to_csv('/home/ruairi/data/tmp/perfection_talk2.csv', index=False)

In [12]:
df['response'].value_counts()

no effect    71669
increase     36556
decrease     32708
Name: response, dtype: int64

In [13]:
df.pivot_table(index='response', columns='experiment_name', values='neuron_id',
              aggfunc='count')

experiment_name,CIT_WAY
response,Unnamed: 1_level_1
decrease,32708
increase,36556
no effect,71669
