In [1]:
import pandas as pd
import numpy as np

In [None]:
pip install pandas --upgrade

In [2]:
pd.__version__

'0.25.3'

In [3]:
pans = pd.DataFrame({"sandwich":["breton", "british bacon", "noruego"],
                     "ingredients":["pork loin, bacon, cheese",
                                    "bacon, cheese",
                                    "salmon, raisins, cream cheese, lettuce"]})

In [4]:
pans

Unnamed: 0,sandwich,ingredients
0,breton,"pork loin, bacon, cheese"
1,british bacon,"bacon, cheese"
2,noruego,"salmon, raisins, cream cheese, lettuce"


#### By using assign method in conjunction with split and the new explode method, we are able to dissagregate elements of a string into single rows which at the same time can be traced back to the original index element

In [5]:
pans = pans.assign(ingredients= pans.ingredients.str.split(",")).explode("ingredients")

In [6]:
pans.ingredients = pans.ingredients.str.lstrip().str.rstrip()

In [8]:
pans

Unnamed: 0,sandwich,ingredients
0,breton,pork loin
0,breton,bacon
0,breton,cheese
1,british bacon,bacon
1,british bacon,cheese
2,noruego,salmon
2,noruego,raisins
2,noruego,cream cheese
2,noruego,lettuce


How many times a given ingredient is used in the sandwiches? With this new structure now it is pretty easy to know it

In [7]:
pans.ingredients.value_counts()

cheese          2
bacon           2
lettuce         1
salmon          1
pork loin       1
cream cheese    1
raisins         1
Name: ingredients, dtype: int64

#### An alternative would have been to use melt method after the split & expand

In [23]:
pans_2 = pd.DataFrame({"sandwich":["breton", "british bacon", "noruego"], "ingredients":["pork loin, bacon, cheese",
                                                                                "bacon, cheese",
                                                                                "salmon, raisins, cream cheese, lettuce"]})

In [24]:
pans_2[["a", "b", "c", "d"]]=pans_2.ingredients.str.split(",", expand=True)

In [25]:
pans_2

Unnamed: 0,sandwich,ingredients,a,b,c,d
0,breton,"pork loin, bacon, cheese",pork loin,bacon,cheese,
1,british bacon,"bacon, cheese",bacon,cheese,,
2,noruego,"salmon, raisins, cream cheese, lettuce",salmon,raisins,cream cheese,lettuce


In [26]:
pans_2b = (pd.melt(pans_2, id_vars= "sandwich", value_vars=["a","b","c","d"]).rename(columns={"value":"ingredient"})
.drop("variable", axis=1)
)

In [27]:
pans_2b = pans_2b[~(pans_2b.ingredient.isna())]

#### However the way to go is longer and still it does not get exactly to the same point since now we cannot trace the origin of the ingredient back to its original sandwich. By using explode method we had this automatically done.

In [28]:
pans_2b

Unnamed: 0,sandwich,ingredient
0,breton,pork loin
1,british bacon,bacon
2,noruego,salmon
3,breton,bacon
4,british bacon,cheese
5,noruego,raisins
6,breton,cheese
8,noruego,cream cheese
11,noruego,lettuce
