In [1]:
import pandas as pd

# String Operations in Pandas

In [2]:
# Sample DataFrame
df = pd.DataFrame({
    "Name": [" Onkar ", "Amit", "Sara ", " Rohit"],
    "City": ["pune", "MUMBAI", "NaShik", "PUNE"],
    "Email": ["onkar@gmail.com", "amit@yahoo.com", "sara@hotmail.com", "rohit@gmail.com"]
})

df

Unnamed: 0,Name,City,Email
0,Onkar,pune,onkar@gmail.com
1,Amit,MUMBAI,amit@yahoo.com
2,Sara,NaShik,sara@hotmail.com
3,Rohit,PUNE,rohit@gmail.com


## 1. Remove extra spaces

Points: 
1. Remove leading and trailing spaces - `.strip()`
2. Remove leading/left space (beginning space) - `.lstrip()`
3. Remove trailing/right space (ending space) - `.rstrip()`

In [7]:
# Before
df["Name"]

0     Onkar 
1       Amit
2      Sara 
3      Rohit
Name: Name, dtype: object

In [8]:
# After
df["Name"].str.strip()

0    Onkar
1     Amit
2     Sara
3    Rohit
Name: Name, dtype: object

## 2. Change letter case

In [9]:
df["City"].str.lower()

0      pune
1    mumbai
2    nashik
3      pune
Name: City, dtype: object

In [10]:
df["City"].str.upper()

0      PUNE
1    MUMBAI
2    NASHIK
3      PUNE
Name: City, dtype: object

In [11]:
df["City"].str.title()

0      Pune
1    Mumbai
2    Nashik
3      Pune
Name: City, dtype: object

## 3. String contains

In [18]:
df[df["Email"].str.contains("gmail")]

Unnamed: 0,Name,City,Email
0,Onkar,pune,onkar@gmail.com
3,Rohit,PUNE,rohit@gmail.com


## 4. Replace string value

In [19]:
df["City"].str.replace("pune", "Pune", case=False)

0      Pune
1    MUMBAI
2    NaShik
3      Pune
Name: City, dtype: object

## 5. Extract substring

In [24]:
df["City"].str[:2]

0    pu
1    MU
2    Na
3    PU
Name: City, dtype: object

In [25]:
df["City"].str[:4]

0    pune
1    MUMB
2    NaSh
3    PUNE
Name: City, dtype: object

## 6. Len of each string

In [26]:
df["Name"].str.len()

0    7
1    4
2    5
3    6
Name: Name, dtype: int64

## 7. split string

In [28]:
df["Email"].str.split("@", expand=True)

Unnamed: 0,0,1
0,onkar,gmail.com
1,amit,yahoo.com
2,sara,hotmail.com
3,rohit,gmail.com


## 8. Extract particular column after split

In [30]:
df["Email"].str.split("@").str[0]

0    onkar
1     amit
2     sara
3    rohit
Name: Email, dtype: object

## 9. Remove sumbols and numbers

In [31]:
df["Name"] = ["@Onkar", "Amit10", "Sara", "Rohit_"]

In [32]:
df["Name"]

0    @Onkar
1    Amit10
2      Sara
3    Rohit_
Name: Name, dtype: object

In [40]:
df["Name"].str.replace(r"[^a-zA-Z]", "", regex=True)

0    Onkar
1     Amit
2     Sara
3    Rohit
Name: Name, dtype: object