In [1]:
import pandas as pd
import numpy as np

In [2]:
s = pd.Series(['X', 'Y', 'Z', 'Aaba', 'Baca', np.nan, 'CABA', None, 'bird', 'horse', 'dog'])
s

0         X
1         Y
2         Z
3      Aaba
4      Baca
5       NaN
6      CABA
7      None
8      bird
9     horse
10      dog
dtype: object

In [3]:
s.str.upper()

0         X
1         Y
2         Z
3      AABA
4      BACA
5       NaN
6      CABA
7      None
8      BIRD
9     HORSE
10      DOG
dtype: object

In [4]:
s.str.lower()

0         x
1         y
2         z
3      aaba
4      baca
5       NaN
6      caba
7      None
8      bird
9     horse
10      dog
dtype: object

In [5]:
s.str.len()

0     1.0
1     1.0
2     1.0
3     4.0
4     4.0
5     NaN
6     4.0
7     NaN
8     4.0
9     5.0
10    3.0
dtype: float64

In [6]:
color1 = pd.Index([' Green', 'Black ', ' Red ', 'White', ' Pink '])
print("Original series:")
print(color1)
print("\nRemove whitespace")
print(color1.str.strip())
print("\nRemove left sided whitespace")
print(color1.str.lstrip())
print("\nRemove Right sided whitespace")
print(color1.str.rstrip())

Original series:
Index([' Green', 'Black ', ' Red ', 'White', ' Pink '], dtype='object')

Remove whitespace
Index(['Green', 'Black', 'Red', 'White', 'Pink'], dtype='object')

Remove left sided whitespace
Index(['Green', 'Black ', 'Red ', 'White', 'Pink '], dtype='object')

Remove Right sided whitespace
Index([' Green', 'Black', ' Red', 'White', ' Pink'], dtype='object')


In [7]:
nums = {'amount': [10, 250, 3000, 40000, 500000]}
print("Original dataframe:")
df = pd.DataFrame(nums)
print(df)
print("\nAdd leading zeros:")
df['amount'] = df['amount'].apply(lambda x: '{0:0>8}'.format(x))
print(df)

Original dataframe:
   amount
0      10
1     250
2    3000
3   40000
4  500000

Add leading zeros:
     amount
0  00000010
1  00000250
2  00003000
3  00040000
4  00500000


In [8]:
nums = {'amount': ['10', '250', '3000', '40000', '500000']}
print("Original dataframe:")
df = pd.DataFrame(nums)
print(df)
print("\nAdd leading zeros:")
df['amount'] = list(map(lambda x: x.zfill(10), df['amount']))
print(df)

Original dataframe:
   amount
0      10
1     250
2    3000
3   40000
4  500000

Add leading zeros:
       amount
0  0000000010
1  0000000250
2  0000003000
3  0000040000
4  0000500000


In [9]:
df = pd.DataFrame({
    'name': ['alberto','gino','ryan', 'Eesha', 'syed'],
    'date_of_birth ': ['17/05/2002','16/02/1999','25/09/1998','11/05/2002','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})
print("Original DataFrame:")
print(df)
print("\nAfter capitalizing name column:")
df['name'] = list(map(lambda x: x.capitalize(), df['name']))
print(df)

Original DataFrame:
      name date_of_birth    age
0  alberto     17/05/2002  18.5
1     gino     16/02/1999  21.2
2     ryan     25/09/1998  22.5
3    Eesha     11/05/2002  22.0
4     syed     15/09/1997  23.0

After capitalizing name column:
      name date_of_birth    age
0  Alberto     17/05/2002  18.5
1     Gino     16/02/1999  21.2
2     Ryan     25/09/1998  22.5
3    Eesha     11/05/2002  22.0
4     Syed     15/09/1997  23.0


In [13]:
df = pd.DataFrame({
    'name_code': ['c001','c002','c022', 'c2002', 'c2222'],
    'date_of_birth': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})
print("Original DataFrame:")
print(df)
print("\nCount occurrence of 2 in date_of_birth column:")
df['count'] = list(map(lambda x: x.count("2"), df['date_of_birth']))
print(df)

Original DataFrame:
  name_code date_of_birth   age
0      c001    12/05/2002  18.5
1      c002    16/02/1999  21.2
2      c022    25/09/1998  22.5
3     c2002    12/02/2022  22.0
4     c2222    15/09/1997  23.0

Count occurrence of 2 in date_of_birth column:
  name_code date_of_birth   age  count
0      c001    12/05/2002  18.5      3
1      c002    16/02/1999  21.2      1
2      c022    25/09/1998  22.5      1
3     c2002    12/02/2022  22.0      5
4     c2222    15/09/1997  23.0      0


In [14]:
df = pd.DataFrame({
    'name_code': ['c001', 'c002', 'c022', 'c2002', 'c2222'],
    'date_of_birth': ['12/05/2002', '16/02/1999', '25/09/1998', '12/02/2022', '15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})

print("Original DataFrame:")
print(df)

# Find occurrence of '22' in name_code column
df['Index'] = list(map(lambda x: x.find('22'), df['name_code']))
print("\nDataFrame with count of '22' in name_code column:")
print(df)

Original DataFrame:
  name_code date_of_birth   age
0      c001    12/05/2002  18.5
1      c002    16/02/1999  21.2
2      c022    25/09/1998  22.5
3     c2002    12/02/2022  22.0
4     c2222    15/09/1997  23.0

DataFrame with count of '22' in name_code column:
  name_code date_of_birth   age  Index
0      c001    12/05/2002  18.5     -1
1      c002    16/02/1999  21.2     -1
2      c022    25/09/1998  22.5      2
3     c2002    12/02/2022  22.0     -1
4     c2222    15/09/1997  23.0      1


In [15]:
df = pd.DataFrame({
    'name_code': ['c0001','1000c','b00c2', 'b2c02', 'c2222'],
    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})
print("Original DataFrame:")
print(df)
print("\nIndex of a substring in a specified column of a dataframe:")
df['Index'] = list(map(lambda x: x.find('c', 0, 5), df['name_code']))
print(df)

Original DataFrame:
  name_code date_of_birth    age
0     c0001     12/05/2002  18.5
1     1000c     16/02/1999  21.2
2     b00c2     25/09/1998  22.5
3     b2c02     12/02/2022  22.0
4     c2222     15/09/1997  23.0

Index of a substring in a specified column of a dataframe:
  name_code date_of_birth    age  Index
0     c0001     12/05/2002  18.5      0
1     1000c     16/02/1999  21.2      4
2     b00c2     25/09/1998  22.5      3
3     b2c02     12/02/2022  22.0      2
4     c2222     15/09/1997  23.0      0


In [19]:
df = pd.DataFrame({
    'name_code': ['Company','Company a001','Company123', '1234', 'Company 12'],
    'date_of_birth ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'age': [18.5, 21.2, 22.5, 22, 23]
})
print("Original DataFrame:")
print(df)
print("\nWhether all characters in the string are alphanumeric?")
df['name_code_is_alphanumeric'] = list(map(lambda x: x.isalnum(), df['name_code']))
print(df)

Original DataFrame:
      name_code date_of_birth    age
0       Company     12/05/2002  18.5
1  Company a001     16/02/1999  21.2
2    Company123     25/09/1998  22.5
3          1234     12/02/2022  22.0
4    Company 12     15/09/1997  23.0

Whether all characters in the string are alphanumeric?
      name_code date_of_birth    age  name_code_is_alphanumeric
0       Company     12/05/2002  18.5                       True
1  Company a001     16/02/1999  21.2                      False
2    Company123     25/09/1998  22.5                       True
3          1234     12/02/2022  22.0                       True
4    Company 12     15/09/1997  23.0                      False


In [21]:
df = pd.DataFrame({
    'company_code': ['Company','Company a001','123', 'abcd', 'Company 12'],
    'date_of_sale ': ['12/05/2002','16/02/1999','25/09/1998','12/02/2022','15/09/1997'],
    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]})

print("Original DataFrame:")
print(df)
print("\nWhether Alphabetic values present in company_code column?")
df['company_code_is_alpha'] = list(map(lambda x: x.isalpha(), df['company_code']))
print(df)

Original DataFrame:
   company_code date_of_sale   sale_amount
0       Company    12/05/2002      12348.5
1  Company a001    16/02/1999     233331.2
2           123    25/09/1998         22.5
3          abcd    12/02/2022    2566552.0
4    Company 12    15/09/1997         23.0

Whether Alphabetic values present in company_code column?
   company_code date_of_sale   sale_amount  company_code_is_alpha
0       Company    12/05/2002      12348.5                   True
1  Company a001    16/02/1999     233331.2                  False
2           123    25/09/1998         22.5                  False
3          abcd    12/02/2022    2566552.0                   True
4    Company 12    15/09/1997         23.0                  False
