In [24]:
import pandas as pd
import numpy as np

df = pd.DataFrame(
    {'trial_num': [1, 2, 3],
     'subject': [1, 1, 1],
     'samples': [['a', 'b'], ['c', 'd'], ['e', 'f']]
    }
)
df

Unnamed: 0,trial_num,subject,samples
0,1,1,"[a, b]"
1,2,1,"[c, d]"
2,3,1,"[e, f]"


# Vectorized: explode() (pandas >= 0.25)

In [49]:
df.explode('samples')

Unnamed: 0,trial_num,subject,samples
0,1,1,a
0,1,1,b
1,2,1,c
1,2,1,d
2,3,1,e
2,3,1,f


moving index to column

In [55]:
df.rename_axis('sample_num').explode('samples').reset_index()

Unnamed: 0,sample_num,trial_num,subject,samples
0,0,1,1,a
1,0,1,1,b
2,1,2,1,c
3,1,2,1,d
4,2,3,1,e
5,2,3,1,f


# Apply

duplicated index

In [54]:
s = (df.apply(lambda x: pd.Series(x['samples']),axis=1)
     .stack()
     .reset_index(level=1, drop=True))
s.name = 'sample'
df.drop('samples', axis=1).join(s)

Unnamed: 0,trial_num,subject,sample
0,1,1,a
0,1,1,b
1,2,1,c
1,2,1,d
2,3,1,e
2,3,1,f


moving index to column

In [59]:
(df.set_index(['subject', 'trial_num'])['samples']
 .apply(pd.Series)
 .stack()
 .reset_index()
 .rename(columns={'level_2':'sample_num', 0:'sample'}))

Unnamed: 0,subject,trial_num,sample_num,sample
0,1,1,0,a
1,1,1,1,b
2,1,2,0,c
3,1,2,1,d
4,1,3,0,e
5,1,3,1,f


# Specialisations

## Strings

single column

In [95]:
dfs = pd.DataFrame(
    {'trial_num': [1, 2, 3],
     'subject': [1, 1, 1],
     'samples': ['a,b', 'c,d', 'e'],
    }
)
dfs

Unnamed: 0,trial_num,subject,samples
0,1,1,"a,b"
1,2,1,"c,d"
2,3,1,e


In [96]:
dfs.drop('samples', axis=1).join(dfs['samples'].str.split(',').explode())

Unnamed: 0,trial_num,subject,samples
0,1,1,a
0,1,1,b
1,2,1,c
1,2,1,d
2,3,1,e


also works on multiple columns

In [97]:
dfs2 = pd.DataFrame(
    {'trial_num': [1, 2, 3],
     'subject': [1, 1, 1],
     'samples': ['a,b', 'c,d', 'e'],
     'samples2': ['a,b', 'c,d', 'e']
    }
)
dfs2

Unnamed: 0,trial_num,subject,samples,samples2
0,1,1,"a,b","a,b"
1,2,1,"c,d","c,d"
2,3,1,e,e


In [99]:
out = dfs2.copy()
for col in ['samples', 'samples2']:
    out = out.drop(col, axis=1).join(out[col].str.split(',').explode())
out

Unnamed: 0,trial_num,subject,samples,samples2
0,1,1,a,a
0,1,1,a,b
0,1,1,a,a
0,1,1,a,b
0,1,1,b,a
0,1,1,b,b
0,1,1,b,a
0,1,1,b,b
1,2,1,c,c
1,2,1,c,d
