In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
data = {'age': [25, 22, 18, 30, 45, 50, 35, 20, 55, 40],
        'gender': ['M', 'F', 'F', 'M', 'M', 'F', 'M', 'F', 'M', 'M'],
        'score': [90, 80, 75, 95, 70, 85, 75, 90, 95, 85]}
df = pd.DataFrame(data)

## 1. Value_counts

In [3]:
df

Unnamed: 0,age,gender,score
0,25,M,90
1,22,F,80
2,18,F,75
3,30,M,95
4,45,M,70
5,50,F,85
6,35,M,75
7,20,F,90
8,55,M,95
9,40,M,85


In [4]:
df['gender'].value_counts()

M    6
F    4
Name: gender, dtype: int64

In [5]:
df['gender'].value_counts(normalize=True)

M    0.6
F    0.4
Name: gender, dtype: float64

## 2. Where()

In [6]:
df.where(df['age']>30)

Unnamed: 0,age,gender,score
0,,,
1,,,
2,,,
3,,,
4,45.0,M,70.0
5,50.0,F,85.0
6,35.0,M,75.0
7,,,
8,55.0,M,95.0
9,40.0,M,85.0


In [7]:
df.where(df['age']>30,other=0)

Unnamed: 0,age,gender,score
0,0,0,0
1,0,0,0
2,0,0,0
3,0,0,0
4,45,M,70
5,50,F,85
6,35,M,75
7,0,0,0
8,55,M,95
9,40,M,85


In [8]:
df.where(df['age']>30,other=0).all(1)

0    False
1    False
2    False
3    False
4     True
5     True
6     True
7    False
8     True
9     True
dtype: bool

In [9]:
df[df.where(df['age']>30,other=0).all(1)]

Unnamed: 0,age,gender,score
4,45,M,70
5,50,F,85
6,35,M,75
8,55,M,95
9,40,M,85


In [10]:
f1=df['age']>30
f2=df['gender']=='F'

In [11]:
df.where(f1 & f2)

Unnamed: 0,age,gender,score
0,,,
1,,,
2,,,
3,,,
4,,,
5,50.0,F,85.0
6,,,
7,,,
8,,,
9,,,


In [12]:
df.where(f1 & f2,other=0).all(1)

0    False
1    False
2    False
3    False
4    False
5     True
6    False
7    False
8    False
9    False
dtype: bool

In [13]:
df[df.where(f1 & f2,other=0).all(1)]

Unnamed: 0,age,gender,score
5,50,F,85


## isin()

In [14]:
df[df['age'].isin([25,35])]

Unnamed: 0,age,gender,score
0,25,M,90
6,35,M,75


In [15]:
df[['age','gender']]

Unnamed: 0,age,gender
0,25,M
1,22,F
2,18,F
3,30,M
4,45,M
5,50,F
6,35,M
7,20,F
8,55,M
9,40,M


In [16]:
df[['age','gender']].isin({'age':[25,35],'gender':['M']})

Unnamed: 0,age,gender
0,True,True
1,False,False
2,False,False
3,False,True
4,False,True
5,False,False
6,True,True
7,False,False
8,False,True
9,False,True


In [17]:
df[df[['age','gender']].isin({'age':[25,35],'gender':['M']})]

Unnamed: 0,age,gender,score
0,25.0,M,
1,,,
2,,,
3,,M,
4,,M,
5,,,
6,35.0,M,
7,,,
8,,M,
9,,M,


In [18]:
df[df[['age','gender']].isin({'age':[25,35],'gender':['M']}).all(1)]

Unnamed: 0,age,gender,score
0,25,M,90
6,35,M,75


## cut and qcut

In [19]:
df

Unnamed: 0,age,gender,score
0,25,M,90
1,22,F,80
2,18,F,75
3,30,M,95
4,45,M,70
5,50,F,85
6,35,M,75
7,20,F,90
8,55,M,95
9,40,M,85


In [20]:
pd.cut(df['score'],bins=[60,70,80,85,90,100])

0     (85, 90]
1     (70, 80]
2     (70, 80]
3    (90, 100]
4     (60, 70]
5     (80, 85]
6     (70, 80]
7     (85, 90]
8    (90, 100]
9     (80, 85]
Name: score, dtype: category
Categories (5, interval[int64, right]): [(60, 70] < (70, 80] < (80, 85] < (85, 90] < (90, 100]]

In [21]:
df['score_bins']=pd.cut(df['score'],bins=[60,70,80,85,90,100])

In [22]:
df

Unnamed: 0,age,gender,score,score_bins
0,25,M,90,"(85, 90]"
1,22,F,80,"(70, 80]"
2,18,F,75,"(70, 80]"
3,30,M,95,"(90, 100]"
4,45,M,70,"(60, 70]"
5,50,F,85,"(80, 85]"
6,35,M,75,"(70, 80]"
7,20,F,90,"(85, 90]"
8,55,M,95,"(90, 100]"
9,40,M,85,"(80, 85]"


In [23]:
bin_names=['very low','low','medium','high','very high']
df['score_bins']=pd.cut(df['score'],bins=[60,70,80,85,90,100],labels=bin_names)

In [24]:
df

Unnamed: 0,age,gender,score,score_bins
0,25,M,90,high
1,22,F,80,low
2,18,F,75,low
3,30,M,95,very high
4,45,M,70,very low
5,50,F,85,medium
6,35,M,75,low
7,20,F,90,high
8,55,M,95,very high
9,40,M,85,medium


In [25]:
pd.cut(df['age'],bins=[10,19,30,50,90])

0    (19, 30]
1    (19, 30]
2    (10, 19]
3    (19, 30]
4    (30, 50]
5    (30, 50]
6    (30, 50]
7    (19, 30]
8    (50, 90]
9    (30, 50]
Name: age, dtype: category
Categories (4, interval[int64, right]): [(10, 19] < (19, 30] < (30, 50] < (50, 90]]

In [26]:
df['age_group']=pd.cut(df['age'],bins=[10,19,30,50,90])

In [27]:
df

Unnamed: 0,age,gender,score,score_bins,age_group
0,25,M,90,high,"(19, 30]"
1,22,F,80,low,"(19, 30]"
2,18,F,75,low,"(10, 19]"
3,30,M,95,very high,"(19, 30]"
4,45,M,70,very low,"(30, 50]"
5,50,F,85,medium,"(30, 50]"
6,35,M,75,low,"(30, 50]"
7,20,F,90,high,"(19, 30]"
8,55,M,95,very high,"(50, 90]"
9,40,M,85,medium,"(30, 50]"


In [28]:
bins_names=['child','teenage','adult','old','very old']
df['age_group']=pd.cut(df['age'],bins=[0,10,19,30,50,90],labels=bins_names)

In [29]:
df

Unnamed: 0,age,gender,score,score_bins,age_group
0,25,M,90,high,adult
1,22,F,80,low,adult
2,18,F,75,low,teenage
3,30,M,95,very high,adult
4,45,M,70,very low,old
5,50,F,85,medium,old
6,35,M,75,low,old
7,20,F,90,high,adult
8,55,M,95,very high,very old
9,40,M,85,medium,old


In [30]:
pd.qcut(df['age'],4)

0      (22.75, 32.5]
1    (17.999, 22.75]
2    (17.999, 22.75]
3      (22.75, 32.5]
4      (43.75, 55.0]
5      (43.75, 55.0]
6      (32.5, 43.75]
7    (17.999, 22.75]
8      (43.75, 55.0]
9      (32.5, 43.75]
Name: age, dtype: category
Categories (4, interval[float64, right]): [(17.999, 22.75] < (22.75, 32.5] < (32.5, 43.75] < (43.75, 55.0]]

In [31]:
pd.qcut(df['score'],5)

0      (87.0, 91.0]
1      (75.0, 83.0]
2    (69.999, 75.0]
3      (91.0, 95.0]
4    (69.999, 75.0]
5      (83.0, 87.0]
6    (69.999, 75.0]
7      (87.0, 91.0]
8      (91.0, 95.0]
9      (83.0, 87.0]
Name: score, dtype: category
Categories (5, interval[float64, right]): [(69.999, 75.0] < (75.0, 83.0] < (83.0, 87.0] < (87.0, 91.0] < (91.0, 95.0]]

In [32]:
df['age_bins'] = pd.qcut(df['age'],4,labels=['young','mid-young','mid-adult','senior'])

In [33]:
df

Unnamed: 0,age,gender,score,score_bins,age_group,age_bins
0,25,M,90,high,adult,mid-young
1,22,F,80,low,adult,young
2,18,F,75,low,teenage,young
3,30,M,95,very high,adult,mid-young
4,45,M,70,very low,old,senior
5,50,F,85,medium,old,senior
6,35,M,75,low,old,mid-adult
7,20,F,90,high,adult,young
8,55,M,95,very high,very old,senior
9,40,M,85,medium,old,mid-adult


## groupby()

In [34]:
data = {'age': [25, 22, 18, 30, 45, 50, 35, 20, 55, 40],
        'gender': ['M', 'F', 'F', 'M', 'M', 'F', 'M', 'F', 'M', 'M'],
        'score': [90, 80, 75, 95, 70, 85, 75, 90, 95, 85]}
df = pd.DataFrame(data)

In [35]:
df

Unnamed: 0,age,gender,score
0,25,M,90
1,22,F,80
2,18,F,75
3,30,M,95
4,45,M,70
5,50,F,85
6,35,M,75
7,20,F,90
8,55,M,95
9,40,M,85


In [36]:
grouped=df.groupby('gender')

In [37]:
grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x000001CA61EE3950>

In [38]:
grouped['score'].mean()

gender
F    82.5
M    85.0
Name: score, dtype: float64

In [39]:
grouped.agg(['mean','sum','count','max','min'])

Unnamed: 0_level_0,age,age,age,age,age,score,score,score,score,score
Unnamed: 0_level_1,mean,sum,count,max,min,mean,sum,count,max,min
gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2
F,27.5,110,4,50,18,82.5,330,4,90,75
M,38.333333,230,6,55,25,85.0,510,6,95,70


In [40]:
grouped['score'].agg(['mean','sum','count','max','min'])

Unnamed: 0_level_0,mean,sum,count,max,min
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
F,82.5,330,4,90,75
M,85.0,510,6,95,70


In [41]:
grouped['age'].agg(['mean','sum','count','max','min'])

Unnamed: 0_level_0,mean,sum,count,max,min
gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
F,27.5,110,4,50,18
M,38.333333,230,6,55,25


In [42]:
df.groupby('gender').agg({'age':'max','score':'mean'})

Unnamed: 0_level_0,age,score
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,50,82.5
M,55,85.0


### Pivot table

In [43]:
df.pivot_table(index='gender',
              aggfunc='mean')

Unnamed: 0_level_0,age,score
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,27.5,82.5
M,38.333333,85.0


In [44]:
df.pivot_table(index='gender',
               values='score',
              aggfunc='mean')

Unnamed: 0_level_0,score
gender,Unnamed: 1_level_1
F,82.5
M,85.0


In [45]:
df.pivot_table(index=['gender','age'],
              aggfunc='mean')

Unnamed: 0_level_0,Unnamed: 1_level_0,score
gender,age,Unnamed: 2_level_1
F,18,75
F,20,90
F,22,80
F,50,85
M,25,90
M,30,95
M,35,75
M,40,85
M,45,70
M,55,95


In [46]:
df.pivot_table(index='gender',
               values=['age','score'],
              aggfunc='mean')

Unnamed: 0_level_0,age,score
gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,27.5,82.5
M,38.333333,85.0


In [47]:
df.pivot_table(index='gender',
               values=['age','score'],
              aggfunc=['mean','sum','count'])

Unnamed: 0_level_0,mean,mean,sum,sum,count,count
Unnamed: 0_level_1,age,score,age,score,age,score
gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
F,27.5,82.5,110,330,4,4
M,38.333333,85.0,230,510,6,6


## nlargest and nsmallest

In [48]:
df['score'].nlargest(3)

3    95
8    95
0    90
Name: score, dtype: int64

In [49]:
df.nlargest(3,'score')

Unnamed: 0,age,gender,score
3,30,M,95
8,55,M,95
0,25,M,90


In [50]:
df.nsmallest(3,'score')

Unnamed: 0,age,gender,score
4,45,M,70
2,18,F,75
6,35,M,75


## query

In [51]:
df.query('age>25')

Unnamed: 0,age,gender,score
3,30,M,95
4,45,M,70
5,50,F,85
6,35,M,75
8,55,M,95
9,40,M,85


In [52]:
df.query('age>25 and gender=="F"')

Unnamed: 0,age,gender,score
5,50,F,85


## sort_values()

In [53]:
df.sort_values(by='age')

Unnamed: 0,age,gender,score
2,18,F,75
7,20,F,90
1,22,F,80
0,25,M,90
3,30,M,95
6,35,M,75
9,40,M,85
4,45,M,70
5,50,F,85
8,55,M,95


## Apply

In [54]:
df


Unnamed: 0,age,gender,score
0,25,M,90
1,22,F,80
2,18,F,75
3,30,M,95
4,45,M,70
5,50,F,85
6,35,M,75
7,20,F,90
8,55,M,95
9,40,M,85


In [55]:
df['gender']

0    M
1    F
2    F
3    M
4    M
5    F
6    M
7    F
8    M
9    M
Name: gender, dtype: object

In [56]:
df['gender'].apply(str.lower)

0    m
1    f
2    f
3    m
4    m
5    f
6    m
7    f
8    m
9    m
Name: gender, dtype: object

In [57]:
df['age'].apply(lambda x:x**2 )

0     625
1     484
2     324
3     900
4    2025
5    2500
6    1225
7     400
8    3025
9    1600
Name: age, dtype: int64

In [58]:
def age(x):
    return x**2

In [59]:
df['age'].apply(age)

0     625
1     484
2     324
3     900
4    2025
5    2500
6    1225
7     400
8    3025
9    1600
Name: age, dtype: int64

In [60]:
dict1 = {'Name': ['Priyang Bhatt', 'Rashmin Panchal', 'Anil Rana', 'Rahul Patel'],
        'City': [' New York ', ' Los Angeles ', ' Chicago', ' Houston'],
        'State': ['NY', 'CA', 'IL', 'TX']}
data = pd.DataFrame(dict1)

In [61]:
data

Unnamed: 0,Name,City,State
0,Priyang Bhatt,New York,NY
1,Rashmin Panchal,Los Angeles,CA
2,Anil Rana,Chicago,IL
3,Rahul Patel,Houston,TX


## str.lower()

In [62]:
data['Name'].str.lower()

0      priyang bhatt
1    rashmin panchal
2          anil rana
3        rahul patel
Name: Name, dtype: object

In [63]:
data['name_lower']=data['Name'].str.lower()

In [64]:
data

Unnamed: 0,Name,City,State,name_lower
0,Priyang Bhatt,New York,NY,priyang bhatt
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal
2,Anil Rana,Chicago,IL,anil rana
3,Rahul Patel,Houston,TX,rahul patel


In [65]:
data['name_upper']=data['Name'].str.upper()

In [66]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL


## str.len()

In [67]:
data['Name'].str.len()

0    13
1    15
2     9
3    11
Name: Name, dtype: int64

In [68]:
data['Name_len']=data['Name'].str.len()

In [69]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11


## str.strip()

### using this strip function we can remove white spaces

In [70]:
data['City'][0]

' New York '

In [71]:
data['City']

0        New York 
1     Los Angeles 
2          Chicago
3          Houston
Name: City, dtype: object

In [72]:
data["city_strip"]=data['City'].str.strip()

In [73]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15,Los Angeles
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston


In [74]:
data['City'][0]

' New York '

In [75]:
data['city_strip'][0]

'New York'

In [76]:
# str.lstrip() used to remove the left side space only

In [77]:
data['city_lstrip']=data['City'].str.lstrip()

In [78]:
data['city_lstrip'][0]

'New York '

In [79]:
data['city_rstrip']=data['City'].str.rstrip()

In [80]:
data['city_rstrip'][0]

' New York'

## str.split()

In [81]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15,Los Angeles,Los Angeles,Los Angeles
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago,Chicago,Chicago
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston,Houston,Houston


In [82]:
data['Name'].str.split(' ')

0      [Priyang, Bhatt]
1    [Rashmin, Panchal]
2          [Anil, Rana]
3        [Rahul, Patel]
Name: Name, dtype: object

In [83]:
data[['First_name','Last_name']]=data['Name'].str.split(' ',expand=True)

In [84]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York,Priyang,Bhatt
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15,Los Angeles,Los Angeles,Los Angeles,Rashmin,Panchal
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago,Chicago,Chicago,Anil,Rana
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston,Houston,Houston,Rahul,Patel


## str.contains()

In [85]:
data['Name'].str.contains('Bhatt')

0     True
1    False
2    False
3    False
Name: Name, dtype: bool

In [86]:
data[data['Name'].str.contains('Bhatt')]

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York,Priyang,Bhatt


## str.replace()

In [87]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York,Priyang,Bhatt
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15,Los Angeles,Los Angeles,Los Angeles,Rashmin,Panchal
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago,Chicago,Chicago,Anil,Rana
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston,Houston,Houston,Rahul,Patel


In [88]:
data['State'].replace('NY','New York')

0    New York
1          CA
2          IL
3          TX
Name: State, dtype: object

## str.startswith()

In [89]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York,Priyang,Bhatt
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15,Los Angeles,Los Angeles,Los Angeles,Rashmin,Panchal
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago,Chicago,Chicago,Anil,Rana
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston,Houston,Houston,Rahul,Patel


In [90]:
data['First_name'].str.startswith('P')

0     True
1    False
2    False
3    False
Name: First_name, dtype: bool

In [91]:
data[data['First_name'].str.startswith('P')]

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York,Priyang,Bhatt


In [92]:
data['First_name'].str.endswith('l')

0    False
1    False
2     True
3     True
Name: First_name, dtype: bool

In [93]:
data[data['First_name'].str.endswith('l')]

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago,Chicago,Chicago,Anil,Rana
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston,Houston,Houston,Rahul,Patel


## sr.cat()

In [94]:
data['First_name'].str.cat(data['Last_name'],sep=' ')

0      Priyang Bhatt
1    Rashmin Panchal
2          Anil Rana
3        Rahul Patel
Name: First_name, dtype: object

In [95]:
data['Full_name']=data['First_name'].str.cat(data['Last_name'],sep=' ')

In [96]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name,Full_name
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York,Priyang,Bhatt,Priyang Bhatt
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15,Los Angeles,Los Angeles,Los Angeles,Rashmin,Panchal,Rashmin Panchal
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago,Chicago,Chicago,Anil,Rana,Anil Rana
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston,Houston,Houston,Rahul,Patel,Rahul Patel


## str.get()

In [97]:
data['Name'].str.get(0)

0    P
1    R
2    A
3    R
Name: Name, dtype: object

In [98]:
data['Name'].str.get(-1)

0    t
1    l
2    a
3    l
Name: Name, dtype: object

In [99]:
data['Name'].str.split()

0      [Priyang, Bhatt]
1    [Rashmin, Panchal]
2          [Anil, Rana]
3        [Rahul, Patel]
Name: Name, dtype: object

In [100]:
data['Name'].str.split().str.get(0)

0    Priyang
1    Rashmin
2       Anil
3      Rahul
Name: Name, dtype: object

In [101]:
data['Name'].str.split().str.get(1)

0      Bhatt
1    Panchal
2       Rana
3      Patel
Name: Name, dtype: object

In [102]:
data['Name'].str.split().str[0]

0    Priyang
1    Rashmin
2       Anil
3      Rahul
Name: Name, dtype: object

## str.slice()

In [103]:
data['Name'].str.slice(-3)

0    att
1    hal
2    ana
3    tel
Name: Name, dtype: object

In [104]:
data['Name'].str.slice(0,3)

0    Pri
1    Ras
2    Ani
3    Rah
Name: Name, dtype: object

In [105]:
data['Name'].str.slice(0,3)+' '+data['State']

0    Pri NY
1    Ras CA
2    Ani IL
3    Rah TX
dtype: object

In [106]:
data['name_state']=data['Name'].str.slice(0,3)+' '+data['State']

## str.find()

In [107]:
data['Name']

0      Priyang Bhatt
1    Rashmin Panchal
2          Anil Rana
3        Rahul Patel
Name: Name, dtype: object

In [108]:
data['Name'].str.find('i')     # if string is not found it returns -1. if finds it returns the first occurence string index

0    2
1    5
2    2
3   -1
Name: Name, dtype: int64

In [109]:
data['Name'].str.rfind('a')

0    10
1    13
2     8
3     7
Name: Name, dtype: int64

In [110]:
data

Unnamed: 0,Name,City,State,name_lower,name_upper,Name_len,city_strip,city_lstrip,city_rstrip,First_name,Last_name,Full_name,name_state
0,Priyang Bhatt,New York,NY,priyang bhatt,PRIYANG BHATT,13,New York,New York,New York,Priyang,Bhatt,Priyang Bhatt,Pri NY
1,Rashmin Panchal,Los Angeles,CA,rashmin panchal,RASHMIN PANCHAL,15,Los Angeles,Los Angeles,Los Angeles,Rashmin,Panchal,Rashmin Panchal,Ras CA
2,Anil Rana,Chicago,IL,anil rana,ANIL RANA,9,Chicago,Chicago,Chicago,Anil,Rana,Anil Rana,Ani IL
3,Rahul Patel,Houston,TX,rahul patel,RAHUL PATEL,11,Houston,Houston,Houston,Rahul,Patel,Rahul Patel,Rah TX
