### split data into date, store name, and price

In [1]:
import pandas as pd
df=pd.DataFrame({'record': ['2019-09-10 COSTCO 78.20', '2019-09-30 KFC 8.21', '2019-10-01 SHELL 23.25']})
df

Unnamed: 0,record
0,2019-09-10 COSTCO 78.20
1,2019-09-30 KFC 8.21
2,2019-10-01 SHELL 23.25


In [2]:
df['date']=df['record'].str.slice(start=0, stop=11)
df

Unnamed: 0,record,date
0,2019-09-10 COSTCO 78.20,2019-09-10
1,2019-09-30 KFC 8.21,2019-09-30
2,2019-10-01 SHELL 23.25,2019-10-01


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   record  3 non-null      object
 1   date    3 non-null      object
dtypes: object(2)
memory usage: 176.0+ bytes


In [4]:
df['date']=pd.to_datetime(df['date'])
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   record  3 non-null      object        
 1   date    3 non-null      datetime64[ns]
dtypes: datetime64[ns](1), object(1)
memory usage: 176.0+ bytes


In [5]:
df['store_name']=df['record'].str.extract(r'(\s.+\s)')
df

Unnamed: 0,record,date,store_name
0,2019-09-10 COSTCO 78.20,2019-09-10,COSTCO
1,2019-09-30 KFC 8.21,2019-09-30,KFC
2,2019-10-01 SHELL 23.25,2019-10-01,SHELL


In [6]:
df['price']=df['record'].str.extract(r'(\d*\.\d{2})')
df

Unnamed: 0,record,date,store_name,price
0,2019-09-10 COSTCO 78.20,2019-09-10,COSTCO,78.2
1,2019-09-30 KFC 8.21,2019-09-30,KFC,8.21
2,2019-10-01 SHELL 23.25,2019-10-01,SHELL,23.25


In [7]:
df['euro_price']=df['price'].astype(float)*1.08
# df['euro_price']=df['price'].apply(lambda x: x*0.9)
df['euro_price']=df['euro_price'].round(2)
df

Unnamed: 0,record,date,store_name,price,euro_price
0,2019-09-10 COSTCO 78.20,2019-09-10,COSTCO,78.2,84.46
1,2019-09-30 KFC 8.21,2019-09-30,KFC,8.21,8.87
2,2019-10-01 SHELL 23.25,2019-10-01,SHELL,23.25,25.11


In [8]:
df.drop(['record'], axis=1, inplace=True)
df

Unnamed: 0,date,store_name,price,euro_price
0,2019-09-10,COSTCO,78.2,84.46
1,2019-09-30,KFC,8.21,8.87
2,2019-10-01,SHELL,23.25,25.11


## Use <span style="color:red">assign method </span> which can reduce the number of code lines

In [9]:
df=pd.DataFrame({'record': ['2019-09-10 COSTCO 78.20', '2019-09-30 KFC 8.21', '2019-10-01 SHELL 23.25']})
df

Unnamed: 0,record
0,2019-09-10 COSTCO 78.20
1,2019-09-30 KFC 8.21
2,2019-10-01 SHELL 23.25


In [10]:
df=(df.assign(
         date=lambda x:x.record.str.slice(start=0, stop=11),
         datetimeformat=lambda x:pd.to_datetime(x.date),
         store_name=lambda x:x.record.str.extract(r'(\s\w+\s)'),
         price=lambda x:x.record.str.extract(r'(\d*\.\d{2})')
             )
      .assign(price=lambda x:x.price.astype(float)
             )
      .assign(euro_price=lambda x:x.price*1.08)
   )
    
df   


Unnamed: 0,record,date,datetimeformat,store_name,price,euro_price
0,2019-09-10 COSTCO 78.20,2019-09-10,2019-09-10,COSTCO,78.2,84.456
1,2019-09-30 KFC 8.21,2019-09-30,2019-09-30,KFC,8.21,8.8668
2,2019-10-01 SHELL 23.25,2019-10-01,2019-10-01,SHELL,23.25,25.11


In [11]:
df=df.drop(['record', 'date'], axis=1)
df=df.rename(columns={'datetimeformat':'date'})
df

Unnamed: 0,date,store_name,price,euro_price
0,2019-09-10,COSTCO,78.2,84.456
1,2019-09-30,KFC,8.21,8.8668
2,2019-10-01,SHELL,23.25,25.11
