In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from pathlib import Path

%matplotlib inline

plt.rcParams["font.family"] = "Arial"
plt.rcParams["font.size"] = 12

sns.set_style("whitegrid",{"axes.edgecolor": "0",'grid.color': '0','font.family':'Arial'})
sns.set_context("notebook",font_scale=1.5)

DATA_DIR = Path('../data')

# Load Siju Data

In [7]:
sdf = pd.read_pickle(DATA_DIR /'siju-cleaned-expanded-pivot.pkl')

s_shape = sdf.shape
print(f'Dataframe shape: {s_shape}')

sdf.head()

Dataframe shape: (90, 16)


Unnamed: 0_level_0,stimulus,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F,DAN dF/F
lobe,Unnamed: 1_level_1,alpha'1,alpha'2,alpha'3,alpha1,alpha2,alpha3,beta'1,beta'2,beta1,beta2,gamma1,gamma2,gamma3,gamma4,gamma5
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
TH-58E02_090816_Citronella,citronella,0.476658,0.412485,0.226218,0.341723,0.230774,1.023435,0.586997,0.837626,0.624777,0.990563,0.05193,0.292853,0.707377,0.790165,0.337667
TH-58E02_170816_Citronella,citronella,0.432107,0.527736,0.175783,0.034967,0.245432,0.371485,0.457447,0.520961,0.281905,0.639173,0.047926,0.671082,0.632577,0.304596,0.075477
181022_TH58E02G6f_ST00_0_Citronella,citronella,0.423704,0.083843,0.37943,-0.011456,0.09759,0.640199,0.695688,0.670628,0.269143,0.50633,0.251025,0.406841,1.010089,0.882712,0.095707
171016_Citronella 2,citronella,0.610923,0.427451,-0.009572,0.135995,0.63708,0.009996,0.079676,0.021876,-0.013304,-0.007225,-0.015125,0.128165,0.039962,0.047961,0.02038
171016_Citronella 1,citronella,0.671743,0.539444,2.572961,0.102841,0.927985,0.578673,0.682591,0.582773,0.21134,0.530082,0.377056,0.601208,0.659796,0.434064,0.096403


# Load Hije Data

In [42]:
hdf = pd.read_pickle(DATA_DIR /'hije-cleaned-expanded-pivot.pkl')
hdf = hdf[['stimulus', 'MBON dF/F']]

h_shape = hdf.shape
print(f'Dataframe shape: {h_shape}')

hdf.head()

Dataframe shape: (45, 16)


Unnamed: 0_level_0,stimulus,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F,MBON dF/F
lobe,Unnamed: 1_level_1,alpha'1,alpha'2,alpha'3,alpha1,alpha2,alpha3,beta'1,beta'2,beta1,beta2,gamma1,gamma2,gamma3,gamma4,gamma5
id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
2-heptanone_fly0,heptanone,57.014634,53.449785,14.488461,31.733999,35.150185,50.098322,29.570272,25.684328,34.010916,11.951235,46.239205,66.086409,42.144978,60.102633,11.447692
2-heptanone_fly1,heptanone,44.168362,91.255851,17.765559,29.395413,28.380873,21.858113,24.830531,16.444911,43.192486,27.904354,53.975628,37.821266,47.514654,52.499951,3.998101
2-heptanone_fly2,heptanone,30.958356,67.175713,19.83457,25.548454,31.03314,79.720826,25.744823,17.759295,46.527031,22.010365,28.879761,29.721496,39.124175,45.002144,1.734179
2-heptanone_fly3,heptanone,36.248176,50.087333,18.65988,21.263302,20.912131,44.043983,38.035029,10.343638,26.904135,5.369238,41.142201,31.95423,70.776622,39.891746,0.429397
2-heptanone_fly4,heptanone,14.15034,21.817438,11.888643,18.037705,17.06354,22.347819,24.288879,5.184545,14.825427,9.185952,11.818865,12.767847,37.395544,31.264467,-2.034475


# Create 'Joining Table'

In [17]:
from itertools import product

In [44]:
stimuli = hdf.stimulus.unique()

In [55]:
join_tab = []

for stim in stimuli:
    sdf_stim = sdf[sdf['stimulus'] == stim]
    hdf_stim = hdf[hdf['stimulus'] == stim]

    print(f'- {stim}')
    print(f'Shape of sdf: {sdf_stim.shape}')
    print(f'Shape of hdf: {hdf_stim.shape}')

    combinations = list(product(sdf_stim.index, hdf_stim.index))
    print(f'Number of combinations: {len(combinations)}')
    
    join_tab.extend(combinations)

print(len(join_tab))

- heptanone
Shape of sdf: (9, 16)
Shape of hdf: (5, 16)
Number of combinations: 45
- citronella
Shape of sdf: (11, 16)
Shape of hdf: (5, 16)
Number of combinations: 55
- ethanol
Shape of sdf: (6, 16)
Shape of hdf: (5, 16)
Number of combinations: 30
- hexanol
Shape of sdf: (8, 16)
Shape of hdf: (5, 16)
Number of combinations: 40
- mch
Shape of sdf: (11, 16)
Shape of hdf: (5, 16)
Number of combinations: 55
- octanol
Shape of sdf: (8, 16)
Shape of hdf: (5, 16)
Number of combinations: 40
- peppermint
Shape of sdf: (12, 16)
Shape of hdf: (5, 16)
Number of combinations: 60
- vinegar
Shape of sdf: (11, 16)
Shape of hdf: (5, 16)
Number of combinations: 55
- yeast
Shape of sdf: (14, 16)
Shape of hdf: (5, 16)
Number of combinations: 70
450


In [71]:
i = 100

print(join_tab[i])

DAN_x = sdf.loc[join_tab[i][0]]
DAN_x.values

('181214a_TH58E02G6f_ST00_1_Ethanol', 'ethanol_fly0')


array(['ethanol', 0.2195584235860149, 0.2294227266991938,
       1.8297317270905022, 0.28516870958161, 0.3021907647065587,
       0.43358365464528753, 2.0095193813803194, 0.5191058720435044,
       0.29907913832380467, 0.7371017931489893, 0.28535458760703214,
       0.5301529346261852, 0.8566485985831983, 0.6035951332534931,
       0.05251213612245484], dtype=object)

In [72]:
MBON_y = hdf.loc[join_tab[i][1]]
MBON_y.values

array(['ethanol', 40.10214899939467, 25.657453562824543, 17.4808562879505,
       20.465026081514782, 9.073939653716906, 45.5018239869886,
       22.746210967683105, 13.711930953905817, 20.375914737353458,
       20.922901146974205, 43.47091272065403, 44.156373943023326,
       20.59049078092252, 25.409393203596448, 1.5976875303586093],
      dtype=object)

In [79]:
# Build numpy array

y_vals = MBON_y.values[1:]
type(y_vals)
y_vals.shape

(15,)

In [95]:
X = np.zeros([len(join_tab), 15])
Y = np.zeros([len(join_tab), 15])

In [97]:
for i, idx_pair in enumerate(join_tab):
    X[i, :] = sdf.loc[idx_pair[0]].values[1:]
    Y[i, :] = hdf.loc[idx_pair[1]].values[1:]

In [98]:
Y

array([[57.01463355, 53.44978546, 14.48846054, ..., 42.14497808,
        60.10263256, 11.44769224],
       [44.16836247, 91.25585057, 17.76555871, ..., 47.51465352,
        52.49995075,  3.99810093],
       [30.95835581, 67.17571251, 19.83456962, ..., 39.12417494,
        45.00214377,  1.73417912],
       ...,
       [46.97025746, 73.13066859,  9.11760464, ..., 39.30695463,
        16.59902517,  6.91397129],
       [37.29378004, 73.24954661, 21.88020087, ..., 56.36695517,
        15.14044057,  9.20381158],
       [31.01367535, 12.46603719, 11.20332533, ..., 41.9181154 ,
        21.19308433,  1.93158313]])

In [99]:
X

array([[0.18879316, 0.08000534, 0.14528292, ..., 1.96631115, 0.22517827,
        0.15213694],
       [0.18879316, 0.08000534, 0.14528292, ..., 1.96631115, 0.22517827,
        0.15213694],
       [0.18879316, 0.08000534, 0.14528292, ..., 1.96631115, 0.22517827,
        0.15213694],
       ...,
       [0.31227529, 0.47202968, 1.22896686, ..., 0.5851319 , 0.58883865,
        0.14318391],
       [0.31227529, 0.47202968, 1.22896686, ..., 0.5851319 , 0.58883865,
        0.14318391],
       [0.31227529, 0.47202968, 1.22896686, ..., 0.5851319 , 0.58883865,
        0.14318391]])

In [100]:
X.min()

-0.2292418312813611

In [101]:
X.max()

4.011015766446184

In [102]:
Y.min()

-10.543254491898383

In [103]:
Y.max()

134.46735444712488

In [108]:
def normalise(A: np.ndarray)->np.ndarray:
    mini = A.min()
    maxi = A.max()
    
    if mini < 0:
        return (A - mini)/(maxi - mini)
    
    return A/maxi

In [112]:
X_n = normalise(X)
Y_n = normalise(Y)

In [115]:
Y_n.min()

0.0

In [126]:
from sklearn.linear_model import LinearRegression 

In [130]:
X = X_n

for i in range(15):
    y = Y_n[:,i]
    model = LinearRegression(fit_intercept=True).fit(X, y)
    print(model.score(X, y))


0.18913599162296668
0.08440870097362196
0.11618833154139163
0.1030478064335032
0.11311468453125162
0.10308380199855471
0.2545386058497583
0.05683757503677822
0.10179567581907267
0.07110302239757804
0.1731306313124622
0.177073644732755
0.1365399803477796
0.18921651582916044
0.11274735267578362
