# Chapter 6: Performing String Manipulations

## Filtering rows based on conditions 

### How to do it...

In [1]:
import polars as pl

In [2]:
df = pl.read_csv('../data/google_store_reviews.csv')
df.head()

reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
str,str,str,str,i64,i64,str,str,str,str,str,str
"""gp:AOqpTOEhZuqSqqWnaKRgv-9ABYd…","""Eric Tie""","""https://play-lh.googleusercont…","""I cannot open the app anymore""",1,0,"""5.4.0.6""","""2020-10-27 21:24:41""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOH0WP4IQKBZ2LrdNmFy_Ym…","""john alpha""","""https://play-lh.googleusercont…","""I have been begging for a refu…",1,0,,"""2020-10-27 14:03:28""","""Please note that from checking…","""2020-10-27 15:05:52""","""newest""","""com.anydo"""
"""gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYa…","""Sudhakar .S""","""https://play-lh.googleusercont…","""Very costly for the premium ve…",1,0,,"""2020-10-27 08:18:40""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOGFrUWuKGycpje8kszj3uw…","""SKGflorida@bellsouth.net DAVID…","""https://play-lh.googleusercont…","""Used to keep me organized, but…",1,0,,"""2020-10-26 13:28:07""","""What do you find troublesome a…","""2020-10-26 14:58:29""","""newest""","""com.anydo"""
"""gp:AOqpTOHls7DW8wmDFzTkHwxuqFk…","""Louann Stoker""","""https://play-lh.googleusercont…","""Dan Birthday Oct 28""",1,0,"""5.6.0.7""","""2020-10-26 06:10:50""",,,"""newest""","""com.anydo"""


In [3]:
# pl.Config.set_fmt_str_lengths=50  # for .py scripts
import os
os.environ['POLARS_FMT_STR_LEN'] = str(50)

In [4]:
(
    df
    .filter(pl.col('content').str.starts_with('Very'))
    .select('content')
    .head()
)

content
str
"""Very costly for the premium version (approx Indian…"
"""Very bad"""
"""Very bad"""
"""Very slow app it took me 10 min just to start the …"
"""Very average app, not intuitive . Will probably sp…"


In [5]:
(
    df
    .filter(pl.col('userName').str.ends_with('Smith'))
    .select('userName')
    .head()
)

userName
str
"""James Smith"""
"""J Smith"""
"""Fj Smith"""
"""Martyn Smith"""
"""D Smith"""


In [6]:
(
    df
    .filter(pl.col('content').str.contains('happy', literal=True))
    .select('content')
    .head()
)

content
str
"""I love this app, but I do have one major gripe - I…"
"""Not happy, app just asked me to 'sign in' and now …"
"""Will be happy if this app comes with time duration…"
"""V usefull app i love it v much I use it daily work…"
"""I was super happy to download this app but that I …"


In [7]:
(
    df
    .filter(pl.col('content').str.contains(r'very happy|best app|I love'))
    .select('content')
    .head()
)

content
str
"""I love this app, but I do have one major gripe - I…"
"""Why are random items popping up on our Grocery Lis…"
"""I love using this app however when I installed it …"
"""There are certain things I love like the fact that…"
"""I love this app, but recently the app keeps crashi…"


In [8]:
(
    df
    .filter(pl.col('content').str.contains_any(['happy', 'love', 'best']))
    .height
)

1237

### There is more

In [9]:
(
    df
    .filter(pl.col('content').str.count_matches(r'very happy|best app|I love') > 2)
    .select('content')
)

content
str
"""I love it :D I met great and fun people and this i…"
"""I love Habitica! I've used it for several years, b…"
"""A lot of work was put into this. I love the idea o…"
"""Very nice app I downloaded many app but it is the …"


In [10]:
( 
    df 
    .filter(pl.col('userName').str.len_chars() > 10) 
    .select('userName') 
    .head() 
) 

userName
str
"""Sudhakar .S"""
"""SKGflorida@bellsouth.net DAVID S"""
"""Louann Stoker"""
"""Jon Clemens"""
"""I Dewa Gede Nopi Ariana"""


## Converting strings into date, time, and datetime 

### How to do it...

In [11]:
df = pl.read_csv('../data/google_store_reviews.csv')
df.head()

reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
str,str,str,str,i64,i64,str,str,str,str,str,str
"""gp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-fgH3…","""Eric Tie""","""https://play-lh.googleusercontent.com/a-/AOh14GiGE…","""I cannot open the app anymore""",1,0,"""5.4.0.6""","""2020-10-27 21:24:41""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3fAX6…","""john alpha""","""https://play-lh.googleusercontent.com/a-/AOh14Gjpf…","""I have been begging for a refund from this app for…",1,0,,"""2020-10-27 14:03:28""","""Please note that from checking our records, your e…","""2020-10-27 15:05:52""","""newest""","""com.anydo"""
"""gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azuuTvq…","""Sudhakar .S""","""https://play-lh.googleusercontent.com/a-/AOh14GidH…","""Very costly for the premium version (approx Indian…",1,0,,"""2020-10-27 08:18:40""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z7hfG…","""SKGflorida@bellsouth.net DAVID S""","""https://play-lh.googleusercontent.com/-75aK0WFniac…","""Used to keep me organized, but all the 2020 UPDATE…",1,0,,"""2020-10-26 13:28:07""","""What do you find troublesome about the update? We'…","""2020-10-26 14:58:29""","""newest""","""com.anydo"""
"""gp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZEQ0Q2…","""Louann Stoker""","""https://play-lh.googleusercontent.com/-pBcY_Z-qfB4…","""Dan Birthday Oct 28""",1,0,"""5.6.0.7""","""2020-10-26 06:10:50""",,,"""newest""","""com.anydo"""


In [12]:
df.select(
    'at',
    pl.col('at').str.to_date(format='%Y-%m-%d %H:%M:%S').alias('at(date)')
).head()

at,at(date)
str,date
"""2020-10-27 21:24:41""",2020-10-27
"""2020-10-27 14:03:28""",2020-10-27
"""2020-10-27 08:18:40""",2020-10-27
"""2020-10-26 13:28:07""",2020-10-26
"""2020-10-26 06:10:50""",2020-10-26


In [13]:
df.select(
    'at',
    pl.col('at').str.to_time(format='%Y-%m-%d %H:%M:%S').alias('at(time)')
).head()

at,at(time)
str,time
"""2020-10-27 21:24:41""",21:24:41
"""2020-10-27 14:03:28""",14:03:28
"""2020-10-27 08:18:40""",08:18:40
"""2020-10-26 13:28:07""",13:28:07
"""2020-10-26 06:10:50""",06:10:50


In [14]:
df.select(
    'at',
    pl.col('at').str.to_datetime(format='%Y-%m-%d %H:%M:%S').alias('at(datetime)')
).head()

at,at(datetime)
str,datetime[μs]
"""2020-10-27 21:24:41""",2020-10-27 21:24:41
"""2020-10-27 14:03:28""",2020-10-27 14:03:28
"""2020-10-27 08:18:40""",2020-10-27 08:18:40
"""2020-10-26 13:28:07""",2020-10-26 13:28:07
"""2020-10-26 06:10:50""",2020-10-26 06:10:50


In [15]:
df.select(
    'at',
    pl.col('at').str.strptime(pl.Date, '%Y-%m-%d %H:%M:%S').alias('at(date)'),
    pl.col('at').str.strptime(pl.Time, '%Y-%m-%d %H:%M:%S').alias('at(time)'),
    pl.col('at').str.strptime(pl.Datetime, '%Y-%m-%d %H:%M:%S').alias('at(datetime)')
).head()

at,at(date),at(time),at(datetime)
str,date,time,datetime[μs]
"""2020-10-27 21:24:41""",2020-10-27,21:24:41,2020-10-27 21:24:41
"""2020-10-27 14:03:28""",2020-10-27,14:03:28,2020-10-27 14:03:28
"""2020-10-27 08:18:40""",2020-10-27,08:18:40,2020-10-27 08:18:40
"""2020-10-26 13:28:07""",2020-10-26,13:28:07,2020-10-26 13:28:07
"""2020-10-26 06:10:50""",2020-10-26,06:10:50,2020-10-26 06:10:50


## Extracting substrings

### How to do it...

In [16]:
import polars as pl

In [17]:
df = pl.read_csv('../data/google_store_reviews.csv')
df.head()

reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
str,str,str,str,i64,i64,str,str,str,str,str,str
"""gp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-fgH3…","""Eric Tie""","""https://play-lh.googleusercontent.com/a-/AOh14GiGE…","""I cannot open the app anymore""",1,0,"""5.4.0.6""","""2020-10-27 21:24:41""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3fAX6…","""john alpha""","""https://play-lh.googleusercontent.com/a-/AOh14Gjpf…","""I have been begging for a refund from this app for…",1,0,,"""2020-10-27 14:03:28""","""Please note that from checking our records, your e…","""2020-10-27 15:05:52""","""newest""","""com.anydo"""
"""gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azuuTvq…","""Sudhakar .S""","""https://play-lh.googleusercontent.com/a-/AOh14GidH…","""Very costly for the premium version (approx Indian…",1,0,,"""2020-10-27 08:18:40""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z7hfG…","""SKGflorida@bellsouth.net DAVID S""","""https://play-lh.googleusercontent.com/-75aK0WFniac…","""Used to keep me organized, but all the 2020 UPDATE…",1,0,,"""2020-10-26 13:28:07""","""What do you find troublesome about the update? We'…","""2020-10-26 14:58:29""","""newest""","""com.anydo"""
"""gp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZEQ0Q2…","""Louann Stoker""","""https://play-lh.googleusercontent.com/-pBcY_Z-qfB4…","""Dan Birthday Oct 28""",1,0,"""5.6.0.7""","""2020-10-26 06:10:50""",,,"""newest""","""com.anydo"""


In [18]:
df.select(
    'userName',
    pl.col('userName').str.slice(3).alias('4thCharAndAfter')
).head()

userName,4thCharAndAfter
str,str
"""Eric Tie""","""c Tie"""
"""john alpha""","""n alpha"""
"""Sudhakar .S""","""hakar .S"""
"""SKGflorida@bellsouth.net DAVID S""","""florida@bellsouth.net DAVID S"""
"""Louann Stoker""","""ann Stoker"""


In [19]:
df.select(
    'userName',
    pl.col('userName').str.slice(3, 5).alias('5CharsAfter4thChar')
).head()

userName,5CharsAfter4thChar
str,str
"""Eric Tie""","""c Tie"""
"""john alpha""","""n alp"""
"""Sudhakar .S""","""hakar"""
"""SKGflorida@bellsouth.net DAVID S""","""flori"""
"""Louann Stoker""","""ann S"""


In [20]:
df.select(
    'userName',
    pl.col('userName').str.slice(-2, 1).alias('TheLastToSecondChar')
).head()

userName,TheLastToSecondChar
str,str
"""Eric Tie""","""i"""
"""john alpha""","""h"""
"""Sudhakar .S""","""."""
"""SKGflorida@bellsouth.net DAVID S""",""" """
"""Louann Stoker""","""e"""


In [22]:
df.select(
    'content',
    pl.col('content')
    .str.extract(r'([A-Za-z]+)')
    .alias('extract')
).head(5)

content,extract
str,str
"""I cannot open the app anymore""","""I"""
"""I have been begging for a refund from this app for…","""I"""
"""Very costly for the premium version (approx Indian…","""Very"""
"""Used to keep me organized, but all the 2020 UPDATE…","""Used"""
"""Dan Birthday Oct 28""","""Dan"""


In [24]:
df.select(
    'content',
    pl.col('content')
    .str.extract(r'([A-Za-z]{3}) ([0-9]+)', 0)
    .alias('extract whole matches specified'),
    pl.col('content')
    .str.extract(r'([A-Za-z]{3}) ([0-9]+)', 1)
    .alias('extract group 1 specified'),
    pl.col('content')
    .str.extract(r'([A-Za-z]{3}) ([0-9]+)', 2)
    .alias('extract group 2 specified')
).head(5)

content,extract whole matches specified,extract group 1 specified,extract group 2 specified
str,str,str,str
"""I cannot open the app anymore""",,,
"""I have been begging for a refund from this app for…",,,
"""Very costly for the premium version (approx Indian…","""ees 910""","""ees""","""910"""
"""Used to keep me organized, but all the 2020 UPDATE…","""the 2020""","""the""","""2020"""
"""Dan Birthday Oct 28""","""Oct 28""","""Oct""","""28"""


In [26]:
df.select(
    'content',
    pl.col('content')
    .str.extract(r'([A-Za-z]+)')
    .alias('extract'),
    pl.col('content')
    .str.extract_all(r'([A-Za-z]+)')
    .alias('extract_all')
).head(5)

content,extract,extract_all
str,str,list[str]
"""I cannot open the app anymore""","""I""","[""I"", ""cannot"", … ""anymore""]"
"""I have been begging for a refund from this app for…","""I""","[""I"", ""have"", … ""me""]"
"""Very costly for the premium version (approx Indian…","""Very""","[""Very"", ""costly"", … ""better""]"
"""Used to keep me organized, but all the 2020 UPDATE…","""Used""","[""Used"", ""to"", … ""salary""]"
"""Dan Birthday Oct 28""","""Dan""","[""Dan"", ""Birthday"", ""Oct""]"


In [27]:
df.select(
    'content',
    pl.col('content')
    .str.extract(r'([A-Za-z]+)')
    .alias('extract'),
    pl.col('content')
    .str.extract_all(r'([A-Za-z]+)')
    .alias('extract_all')
).head(5)

content,extract,extract_all
str,str,list[str]
"""I cannot open the app anymore""","""I""","[""I"", ""cannot"", … ""anymore""]"
"""I have been begging for a refund from this app for…","""I""","[""I"", ""have"", … ""me""]"
"""Very costly for the premium version (approx Indian…","""Very""","[""Very"", ""costly"", … ""better""]"
"""Used to keep me organized, but all the 2020 UPDATE…","""Used""","[""Used"", ""to"", … ""salary""]"
"""Dan Birthday Oct 28""","""Dan""","[""Dan"", ""Birthday"", ""Oct""]"


In [28]:
df.select(
    'content',
    pl.col('content')
    .str.extract(r'([A-Za-z]{3}) ([0-9]+)', 0)
    .alias('extract'),
    pl.col('content')
    .str.extract_groups(r'([A-Za-z]{3}) ([0-9]+)')
    .alias('extract_groups')
).head()

content,extract,extract_groups
str,str,struct[2]
"""I cannot open the app anymore""",,"{null,null}"
"""I have been begging for a refund from this app for…",,"{null,null}"
"""Very costly for the premium version (approx Indian…","""ees 910""","{""ees"",""910""}"
"""Used to keep me organized, but all the 2020 UPDATE…","""the 2020""","{""the"",""2020""}"
"""Dan Birthday Oct 28""","""Oct 28""","{""Oct"",""28""}"


### There is more...

In [29]:
df.select(
    'content',
    pl.col('content')
    .str.extract_all(r'(?i)([A-Z]+)')
    .alias('extract_all')
).head()

content,extract_all
str,list[str]
"""I cannot open the app anymore""","[""I"", ""cannot"", … ""anymore""]"
"""I have been begging for a refund from this app for…","[""I"", ""have"", … ""me""]"
"""Very costly for the premium version (approx Indian…","[""Very"", ""costly"", … ""better""]"
"""Used to keep me organized, but all the 2020 UPDATE…","[""Used"", ""to"", … ""salary""]"
"""Dan Birthday Oct 28""","[""Dan"", ""Birthday"", ""Oct""]"


## Cleaning strings

### How to do it...

In [30]:
import polars as pl

In [31]:
df = pl.DataFrame(
    {
        'text': [
            '  I aM a HUmAn.  ', 
            'it is NOT   easy!  ', 
            ' WHY are You cool'
        ]
    }
)
df.head()

text
str
""" I aM a HUmAn. """
"""it is NOT easy! """
""" WHY are You cool"""


In [32]:
df.select(
    'text',
    pl.col('text')
    .str.strip_chars()
    .alias('stripped_text')
)

text,stripped_text
str,str
""" I aM a HUmAn. ""","""I aM a HUmAn."""
"""it is NOT easy! ""","""it is NOT easy!"""
""" WHY are You cool""","""WHY are You cool"""


In [33]:
df.select(
    'text',
    pl.col('text')
    .str.replace('a', 'new_a', literal=True, n=1)
    .alias('replaced_text')
)

text,replaced_text
str,str
""" I aM a HUmAn. """,""" I new_aM a HUmAn. """
"""it is NOT easy! ""","""it is NOT enew_asy! """
""" WHY are You cool""",""" WHY new_are You cool"""


In [34]:
df.select(
    'text',
    pl.col('text')
    .str.replace_all('a', 'new_a', literal=True)
    .alias('replaced_all_text')
)

text,replaced_all_text
str,str
""" I aM a HUmAn. """,""" I new_aM new_a HUmAn. """
"""it is NOT easy! ""","""it is NOT enew_asy! """
""" WHY are You cool""",""" WHY new_are You cool"""


In [35]:
df.select(
    'text',
    pl.col('text')
    .str.to_titlecase().alias('title_case')
)

text,title_case
str,str
""" I aM a HUmAn. """,""" I Am A Human. """
"""it is NOT easy! ""","""It Is Not Easy! """
""" WHY are You cool""",""" Why Are You Cool"""


In [79]:
df.select(
    'text',
    pl.col('text')
    .str.to_lowercase().alias('lower_case')
)

text,lower_case
str,str
""" I aM a HUmAn. """,""" i am a human. """
"""it is NOT easy! ""","""it is not easy! """
""" WHY are You cool""",""" why are you cool"""


In [80]:
df.select(
    'text',
    pl.col('text')
    .str.to_uppercase().alias('upper_case')
)

text,upper_case
str,str
""" I aM a HUmAn. """,""" I AM A HUMAN. """
"""it is NOT easy! ""","""IT IS NOT EASY! """
""" WHY are You cool""",""" WHY ARE YOU COOL"""


In [81]:
df.select(
    'text',
    pl.col('text')
    .str.pad_start(20, '~').alias('pad_start'),
    pl.col('text')
    .str.pad_end(20, '~').alias('pad_end'),
)

text,pad_start,pad_end
str,str,str
""" I aM a HUmAn. ""","""~~~ I aM a HUmAn. """,""" I aM a HUmAn. ~~~"""
"""it is NOT easy! ""","""~it is NOT easy! ""","""it is NOT easy! ~"""
""" WHY are You cool""","""~~~ WHY are You cool""",""" WHY are You cool~~~"""


In [82]:
df.select(
    'text',
    pl.col('text')
    .str.pad_start(20, '0').alias('pad_start')
  
)

text,pad_start
str,str
""" I aM a HUmAn. ""","""000 I aM a HUmAn. """
"""it is NOT easy! ""","""0it is NOT easy! """
""" WHY are You cool""","""000 WHY are You cool"""


## Splitting strings into lists and structs 

### How to do it...

In [36]:
import polars as pl

df = pl.read_csv('../data/google_store_reviews.csv')
df.head()

reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
str,str,str,str,i64,i64,str,str,str,str,str,str
"""gp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-fgH3…","""Eric Tie""","""https://play-lh.googleusercontent.com/a-/AOh14GiGE…","""I cannot open the app anymore""",1,0,"""5.4.0.6""","""2020-10-27 21:24:41""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3fAX6…","""john alpha""","""https://play-lh.googleusercontent.com/a-/AOh14Gjpf…","""I have been begging for a refund from this app for…",1,0,,"""2020-10-27 14:03:28""","""Please note that from checking our records, your e…","""2020-10-27 15:05:52""","""newest""","""com.anydo"""
"""gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azuuTvq…","""Sudhakar .S""","""https://play-lh.googleusercontent.com/a-/AOh14GidH…","""Very costly for the premium version (approx Indian…",1,0,,"""2020-10-27 08:18:40""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z7hfG…","""SKGflorida@bellsouth.net DAVID S""","""https://play-lh.googleusercontent.com/-75aK0WFniac…","""Used to keep me organized, but all the 2020 UPDATE…",1,0,,"""2020-10-26 13:28:07""","""What do you find troublesome about the update? We'…","""2020-10-26 14:58:29""","""newest""","""com.anydo"""
"""gp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZEQ0Q2…","""Louann Stoker""","""https://play-lh.googleusercontent.com/-pBcY_Z-qfB4…","""Dan Birthday Oct 28""",1,0,"""5.6.0.7""","""2020-10-26 06:10:50""",,,"""newest""","""com.anydo"""


In [37]:
df.select(
    'content',
    pl.col('content').str.split(by=' ').alias('split')
).head()

content,split
str,list[str]
"""I cannot open the app anymore""","[""I"", ""cannot"", … ""anymore""]"
"""I have been begging for a refund from this app for…","[""I"", ""have"", … ""me""]"
"""Very costly for the premium version (approx Indian…","[""Very"", ""costly"", … ""better.""]"
"""Used to keep me organized, but all the 2020 UPDATE…","[""Used"", ""to"", … ""🤤🤤🤤""]"
"""Dan Birthday Oct 28""","[""Dan"", ""Birthday"", … ""28""]"


In [38]:
df.select(
    'content',
    pl.col('content').str.splitn(by=' ', n=10).alias('splitn'),
    pl.col('content').str.split_exact(by=' ', n=10).alias('split_exact')
).head()

content,splitn,split_exact
str,struct[10],struct[11]
"""I cannot open the app anymore""","{""I"",""cannot"",""open"",""the"",""app"",""anymore"",null,null,null,null}","{""I"",""cannot"",""open"",""the"",""app"",""anymore"",null,null,null,null,null}"
"""I have been begging for a refund from this app for…","{""I"",""have"",""been"",""begging"",""for"",""a"",""refund"",""from"",""this"",""app for over a month and nobody is replying me""}","{""I"",""have"",""been"",""begging"",""for"",""a"",""refund"",""from"",""this"",""app"",""for""}"
"""Very costly for the premium version (approx Indian…","{""Very"",""costly"",""for"",""the"",""premium"",""version"",""(approx"",""Indian"",""Rupees"",""910 per year). Better to download the premium version of this app from apkmos website and use it. Microsoft to do list app is far more better.""}","{""Very"",""costly"",""for"",""the"",""premium"",""version"",""(approx"",""Indian"",""Rupees"",""910"",""per""}"
"""Used to keep me organized, but all the 2020 UPDATE…","{""Used"",""to"",""keep"",""me"",""organized,"",""but"",""all"",""the"",""2020"",""UPDATES have made a mess of things !!! Y cudn't u leave well enuf alone ??? Guess ur techies feel the need to keep making changes to justify continuing to collect their salary !!! 🤤🤤🤤""}","{""Used"",""to"",""keep"",""me"",""organized,"",""but"",""all"",""the"",""2020"",""UPDATES"",""have""}"
"""Dan Birthday Oct 28""","{""Dan"",""Birthday"",""Oct"",""28"",null,null,null,null,null,null}","{""Dan"",""Birthday"",""Oct"",""28"",null,null,null,null,null,null,null}"


## Concatenating strings

### How to do it...

In [39]:
import polars as pl

In [40]:
df = pl.DataFrame(
    {
        'colA': ['a', 'b', 'c', 'd'], 
        'colB': ['aa', 'bb', 'cc', 'dd']
    }
)
df

colA,colB
str,str
"""a""","""aa"""
"""b""","""bb"""
"""c""","""cc"""
"""d""","""dd"""


In [41]:
df.select(
    pl.all(),
    (pl.col('colB') + ' new').alias('newColB')
)

colA,colB,newColB
str,str,str
"""a""","""aa""","""aa new"""
"""b""","""bb""","""bb new"""
"""c""","""cc""","""cc new"""
"""d""","""dd""","""dd new"""


In [42]:
df.select(
    pl.all(),
    (pl.col('colA') + pl.col('colB')).alias('colC')

)

colA,colB,colC
str,str,str
"""a""","""aa""","""aaa"""
"""b""","""bb""","""bbb"""
"""c""","""cc""","""ccc"""
"""d""","""dd""","""ddd"""


In [43]:
df.select(
    pl.all(),
    pl.concat_str(
        pl.lit(100)+3, 
        pl.lit(' '),
        pl.col('colA'), 
        pl.col('colB'),
        separator='::'
    ).alias('newCol')
)

colA,colB,newCol
str,str,str
"""a""","""aa""","""103:: ::a::aa"""
"""b""","""bb""","""103:: ::b::bb"""
"""c""","""cc""","""103:: ::c::cc"""
"""d""","""dd""","""103:: ::d::dd"""


In [44]:
df.select(
    pl.all(),
    pl.col('colA').str.join(delimiter=', ').alias('concatenatedColA')
)

colA,colB,concatenatedColA
str,str,str
"""a""","""aa""","""a, b, c, d"""
"""b""","""bb""","""a, b, c, d"""
"""c""","""cc""","""a, b, c, d"""
"""d""","""dd""","""a, b, c, d"""
