# Performing String Manipulations

## Filtering rows based on conditions 

### How to do it...

In [1]:
import polars as pl

In [81]:
df = pl.read_csv('../data/google_store_reviews.csv')
df.head()

reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
str,str,str,str,i64,i64,str,str,str,str,str,str
"""gp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-fgH…","""Eric Tie""","""https://play-lh.googleusercontent.com/a-/AOh14GiG…","""I cannot open the app anymore""",1,0,"""5.4.0.6""","""2020-10-27 21:24:41""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3fAX…","""john alpha""","""https://play-lh.googleusercontent.com/a-/AOh14Gjp…","""I have been begging for a refund from this app fo…",1,0,,"""2020-10-27 14:03:28""","""Please note that from checking our records, your …","""2020-10-27 15:05:52""","""newest""","""com.anydo"""
"""gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azuuTv…","""Sudhakar .S""","""https://play-lh.googleusercontent.com/a-/AOh14Gid…","""Very costly for the premium version (approx India…",1,0,,"""2020-10-27 08:18:40""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z7hf…","""SKGflorida@bellsouth.net DAVID S""","""https://play-lh.googleusercontent.com/-75aK0WFnia…","""Used to keep me organized, but all the 2020 UPDAT…",1,0,,"""2020-10-26 13:28:07""","""What do you find troublesome about the update? We…","""2020-10-26 14:58:29""","""newest""","""com.anydo"""
"""gp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZEQ0Q…","""Louann Stoker""","""https://play-lh.googleusercontent.com/-pBcY_Z-qfB…","""Dan Birthday Oct 28""",1,0,"""5.6.0.7""","""2020-10-26 06:10:50""",,,"""newest""","""com.anydo"""


In [48]:
# pl.Config.set_fmt_str_lengths=50  # for .py scripts
import os
os.environ['POLARS_FMT_STR_LEN'] = str(50)

In [49]:
(
    df
    .filter(pl.col('content').str.starts_with('Very'))
    .select('content')
    .head()
)

content
str
"""Very costly for the premium version (approx India…"
"""Very bad"""
"""Very bad"""
"""Very slow app it took me 10 min just to start the…"
"""Very average app, not intuitive . Will probably s…"


In [50]:
(
    df
    .filter(pl.col('userName').str.ends_with('Smith'))
    .select('userName')
    .head()
)

userName
str
"""James Smith"""
"""J Smith"""
"""Fj Smith"""
"""Martyn Smith"""
"""D Smith"""


In [56]:
(
    df
    .filter(pl.col('content').str.contains('happy', literal=True))
    .select('content')
    .head()
)

content
str
"""I love this app, but I do have one major gripe - …"
"""Not happy, app just asked me to 'sign in' and now…"
"""Will be happy if this app comes with time duratio…"
"""V usefull app i love it v much I use it daily wor…"
"""I was super happy to download this app but that I…"


In [62]:
(
    df
    .filter(pl.col('content').str.contains('very happy|best app|I love'))
    .select('content')
    .head()
)

content
str
"""I love this app, but I do have one major gripe - …"
"""Why are random items popping up on our Grocery Li…"
"""I love using this app however when I installed it…"
"""There are certain things I love like the fact tha…"
"""I love this app, but recently the app keeps crash…"


In [90]:
(
    df
    .filter(pl.col('content').str.count_matches('very happy|best app|I love') > 2)
    .select('content')
)

content
str
"""I love it :D I met great and fun people and this …"
"""I love Habitica! I've used it for several years, …"
"""A lot of work was put into this. I love the idea …"
"""Very nice app I downloaded many app but it is the…"


In [91]:
( 
    df 
    .filter(pl.col('userName').str.len_chars() > 10) 
    .select('userName') 
    .head() 
) 

userName
str
"""Sudhakar .S"""
"""SKGflorida@bellsouth.net DAVID S"""
"""Louann Stoker"""
"""Jon Clemens"""
"""I Dewa Gede Nopi Ariana"""


## Converting strings into date, time, and datetime 

### How to do it...

In [92]:
df = pl.read_csv('../data/google_store_reviews.csv')
df.head()

reviewId,userName,userImage,content,score,thumbsUpCount,reviewCreatedVersion,at,replyContent,repliedAt,sortOrder,appId
str,str,str,str,i64,i64,str,str,str,str,str,str
"""gp:AOqpTOEhZuqSqqWnaKRgv-9ABYdajFUB0WugPGh-SG-fgH…","""Eric Tie""","""https://play-lh.googleusercontent.com/a-/AOh14GiG…","""I cannot open the app anymore""",1,0,"""5.4.0.6""","""2020-10-27 21:24:41""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOH0WP4IQKBZ2LrdNmFy_YmpPCVrV3diEU9KGm3fAX…","""john alpha""","""https://play-lh.googleusercontent.com/a-/AOh14Gjp…","""I have been begging for a refund from this app fo…",1,0,,"""2020-10-27 14:03:28""","""Please note that from checking our records, your …","""2020-10-27 15:05:52""","""newest""","""com.anydo"""
"""gp:AOqpTOEMCkJB8Iq1p-r9dPwnSYadA5BkPWTf32Z1azuuTv…","""Sudhakar .S""","""https://play-lh.googleusercontent.com/a-/AOh14Gid…","""Very costly for the premium version (approx India…",1,0,,"""2020-10-27 08:18:40""",,,"""newest""","""com.anydo"""
"""gp:AOqpTOGFrUWuKGycpje8kszj3uwHN6tU_fd4gLVFy9z7hf…","""SKGflorida@bellsouth.net DAVID S""","""https://play-lh.googleusercontent.com/-75aK0WFnia…","""Used to keep me organized, but all the 2020 UPDAT…",1,0,,"""2020-10-26 13:28:07""","""What do you find troublesome about the update? We…","""2020-10-26 14:58:29""","""newest""","""com.anydo"""
"""gp:AOqpTOHls7DW8wmDFzTkHwxuqFkdNQtKHmO6Pt9jhZEQ0Q…","""Louann Stoker""","""https://play-lh.googleusercontent.com/-pBcY_Z-qfB…","""Dan Birthday Oct 28""",1,0,"""5.6.0.7""","""2020-10-26 06:10:50""",,,"""newest""","""com.anydo"""


In [98]:
df.select(
    'at',
    pl.col('at').str.to_date(format='%Y-%m-%d %H:%M:%S').alias('at(date)')
).head()

at,at(date)
str,date
"""2020-10-27 21:24:41""",2020-10-27
"""2020-10-27 14:03:28""",2020-10-27
"""2020-10-27 08:18:40""",2020-10-27
"""2020-10-26 13:28:07""",2020-10-26
"""2020-10-26 06:10:50""",2020-10-26


In [99]:
df.select(
    'at',
    pl.col('at').str.to_time(format='%Y-%m-%d %H:%M:%S').alias('at(time)')
).head()

at,at(time)
str,time
"""2020-10-27 21:24:41""",21:24:41
"""2020-10-27 14:03:28""",14:03:28
"""2020-10-27 08:18:40""",08:18:40
"""2020-10-26 13:28:07""",13:28:07
"""2020-10-26 06:10:50""",06:10:50


In [111]:
df.select(
    'at',
    pl.col('at').str.to_datetime(format='%Y-%m-%d %H:%M:%S').alias('at(datetime)')
).head()

at,at(datetime)
str,datetime[μs]
"""2020-10-27 21:24:41""",2020-10-27 21:24:41
"""2020-10-27 14:03:28""",2020-10-27 14:03:28
"""2020-10-27 08:18:40""",2020-10-27 08:18:40
"""2020-10-26 13:28:07""",2020-10-26 13:28:07
"""2020-10-26 06:10:50""",2020-10-26 06:10:50


In [110]:
df.select(
    'at',
    pl.col('at').str.strptime(pl.Date, '%Y-%m-%d %H:%M:%S').alias('at(date)'),
    pl.col('at').str.strptime(pl.Time, '%Y-%m-%d %H:%M:%S').alias('at(time)'),
    pl.col('at').str.strptime(pl.Datetime, '%Y-%m-%d %H:%M:%S').alias('at(datetime)')
).head()

at,at(date),at(time),at(datetime)
str,date,time,datetime[μs]
"""2020-10-27 21:24:41""",2020-10-27,21:24:41,2020-10-27 21:24:41
"""2020-10-27 14:03:28""",2020-10-27,14:03:28,2020-10-27 14:03:28
"""2020-10-27 08:18:40""",2020-10-27,08:18:40,2020-10-27 08:18:40
"""2020-10-26 13:28:07""",2020-10-26,13:28:07,2020-10-26 13:28:07
"""2020-10-26 06:10:50""",2020-10-26,06:10:50,2020-10-26 06:10:50
