In [1]:
import pandas as pd

In [2]:
data = ['Pritam   ','Kir    an','    pooja','**sarang##','Suji123$t']
data

['Pritam   ', 'Kir    an', '    pooja', '**sarang##', 'Suji123$t']

## preprocessing over text data

In [3]:
s = pd.Series(data)
s

0     Pritam   
1     Kir    an
2         pooja
3    **sarang##
4     Suji123$t
dtype: object

In [4]:
# lets apply strip method from str to remove spaces at prefix and sufix
s = s.str.strip()
s

0        Pritam
1     Kir    an
2         pooja
3    **sarang##
4     Suji123$t
dtype: object

In [5]:
# use strip to remove * and # from both the ends

s = s.str.strip('#*')
s

0       Pritam
1    Kir    an
2        pooja
3       sarang
4    Suji123$t
dtype: object

In [6]:
# in between char/blocks can be removed using a replace
s = s.str.replace(' ','')
s

0       Pritam
1        Kiran
2        pooja
3       sarang
4    Suji123$t
dtype: object

In [7]:
s = s.str.replace('123','')
s

0    Pritam
1     Kiran
2     pooja
3    sarang
4    Suji$t
dtype: object

In [8]:
s = s.str.replace('$','')
s

  s = s.str.replace('$','')


0    Pritam
1     Kiran
2     pooja
3    sarang
4     Sujit
dtype: object

In [9]:
data

['Pritam   ', 'Kir    an', '    pooja', '**sarang##', 'Suji123$t']

In [10]:
# i want to convert names into title case
s = s.str.title()
s

0    Pritam
1     Kiran
2     Pooja
3    Sarang
4     Sujit
dtype: object

### Sort the names present in the Series

In [11]:
s.sort_values() # output is temp
# it sorts the values in ascending data by default

1     Kiran
2     Pooja
0    Pritam
3    Sarang
4     Sujit
dtype: object

In [12]:
# to sort in decending order use keyword, ascending=False by default it is True
s.sort_values(ascending=False)

4     Sujit
3    Sarang
0    Pritam
2     Pooja
1     Kiran
dtype: object

In [13]:
z= s.sort_values(ascending=False)
z

4     Sujit
3    Sarang
0    Pritam
2     Pooja
1     Kiran
dtype: object

In [14]:
# in above case indices are not in sequence order 
# to align them properly 
z.reset_index()
# gives a temp. dataframe

Unnamed: 0,index,0
0,4,Sujit
1,3,Sarang
2,0,Pritam
3,2,Pooja
4,1,Kiran


In [15]:
# if we dont want extra index column which appears in above output
z.reset_index(drop=True)
# returns temp Series

0     Sujit
1    Sarang
2    Pritam
3     Pooja
4     Kiran
dtype: object

In [16]:
z

4     Sujit
3    Sarang
0    Pritam
2     Pooja
1     Kiran
dtype: object

In [17]:
# if we want to sort the data with index then use sort_index()
z.sort_index() # it returns temporary output

0    Pritam
1     Kiran
2     Pooja
3    Sarang
4     Sujit
dtype: object

In [18]:
# if you want permanant solution
# ie. changes should persist in z permanantly
# then use inplace attribute
z.sort_index(inplace=True)

In [19]:
z

0    Pritam
1     Kiran
2     Pooja
3    Sarang
4     Sujit
dtype: object

In [20]:
e = pd.Series([10,20,30,40],[0,10,4,5])
e

0     10
10    20
4     30
5     40
dtype: int64

In [21]:
# sort index only
e.reset_index(drop=True)

0    10
1    20
2    30
3    40
dtype: int64

In [22]:
# now sort the values with respect to index
e.sort_index()

0     10
4     30
5     40
10    20
dtype: int64

# ---------------------------------------------------

In [23]:
s

0    Pritam
1     Kiran
2     Pooja
3    Sarang
4     Sujit
dtype: object

In [24]:
# calculate the length of each name from above Series
for i in s:
    print(len(i))

6
5
5
6
5


In [25]:
# 2nd sol
# using list comprehension
[len(i) for i in s]

[6, 5, 5, 6, 5]

## apply(func)

In [27]:
l =  lambda nm: len(nm)
s.apply(l)

0    6
1    5
2    5
3    6
4    5
dtype: int64

In [28]:
s

0    Pritam
1     Kiran
2     Pooja
3    Sarang
4     Sujit
dtype: object

In [29]:
# convert a names to uppercase
s.str.upper()

0    PRITAM
1     KIRAN
2     POOJA
3    SARANG
4     SUJIT
dtype: object

In [31]:
# another sol. using apply()
s.apply(lambda nm:nm.upper())

0    PRITAM
1     KIRAN
2     POOJA
3    SARANG
4     SUJIT
dtype: object

In [33]:
# another sol, using map(func,sequence) Python
list(map(lambda x:x.upper(),s))

['PRITAM', 'KIRAN', 'POOJA', 'SARANG', 'SUJIT']

## map()

In [34]:
s.map(lambda nm: nm.upper())

0    PRITAM
1     KIRAN
2     POOJA
3    SARANG
4     SUJIT
dtype: object

In [35]:
# Interview Question
k = pd.Series(['Arti Patil','Deepali Bhosale','Gaurav Desai'])
k

0         Arti Patil
1    Deepali Bhosale
2       Gaurav Desai
dtype: object

### P.S. - Fetch Series of Surname

In [36]:
k.apply(lambda x:x.split()[-1])

0      Patil
1    Bhosale
2      Desai
dtype: object

In [37]:
k.map(lambda nm:nm.split()[-1])

0      Patil
1    Bhosale
2      Desai
dtype: object

In [38]:
for i in k:
    print(i.split()[-1])

Patil
Bhosale
Desai


In [39]:
[i.split()[-1] for i in k]

['Patil', 'Bhosale', 'Desai']

In [42]:
# Interview Question
# if its Male convert to 0 else 1
m = pd.Series(['Male','Female','Female','Male'])
m

0      Male
1    Female
2    Female
3      Male
dtype: object

In [44]:
import numpy as np
np.where(m=='Male',0,1)

array([0, 1, 1, 0])

In [45]:
# using list comprehension
[0 if i =='Male' else 1 for i in m]

[0, 1, 1, 0]

### Convert Series to list

In [46]:
s

0    Pritam
1     Kiran
2     Pooja
3    Sarang
4     Sujit
dtype: object

In [48]:
list(s) #sol. 1

['Pritam', 'Kiran', 'Pooja', 'Sarang', 'Sujit']

In [49]:
s.to_list() #sol. 2

['Pritam', 'Kiran', 'Pooja', 'Sarang', 'Sujit']

In [50]:
s.to_dict()

{0: 'Pritam', 1: 'Kiran', 2: 'Pooja', 3: 'Sarang', 4: 'Sujit'}

In [51]:
s.to_numpy()

array(['Pritam', 'Kiran', 'Pooja', 'Sarang', 'Sujit'], dtype=object)

In [52]:
d = pd.Series(['10000.00 Cr','4500.00 Dr','6000.00 Cr'])
d

0    10000.00 Cr
1     4500.00 Dr
2     6000.00 Cr
dtype: object

In [53]:
# will fetch onlu Cr entries
d.str.endswith('Cr')
d = d[d.str.endswith('Cr')]
d

0    10000.00 Cr
2     6000.00 Cr
dtype: object

In [54]:
d = d.str.strip(' Cr')
d

0    10000.00
2     6000.00
dtype: object

In [55]:
#convert d from object dtype to float dtype
d = d.astype(float)
d

0    10000.0
2     6000.0
dtype: float64