In [1]:
import pandas as pd
import re as re

In [2]:
df = pd.DataFrame({
    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],
    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],
    'sale_amount': [12348.5, 233331.2, 22.5, 2566552.0, 23.0]
})
print("Original DataFrame:")
print(df)

def find_valid_dates(dt):
    #format: mm-dd-yyyy
    result = re.findall(r'\b(1[0-2]|0[1-9])/(3[01]|[12][0-9]|0[1-9])/([0-9]{4})\b',dt)
    return result
df['valid_dates']=df['date_of_sale'].apply(lambda dt : find_valid_dates(dt))
print("\nValid dates (format: mm-dd-yyyy):")
print(df)

Original DataFrame:
  company_code date_of_sale  sale_amount
0         Abcd   12/05/2002      12348.5
1         EFGF   16/02/1999     233331.2
2      zefsalf   05/09/1998         22.5
3      sdfslew   12/02/2022    2566552.0
4      zekfsdf   15/09/1997         23.0

Valid dates (format: mm-dd-yyyy):
  company_code date_of_sale  sale_amount       valid_dates
0         Abcd   12/05/2002      12348.5  [(12, 05, 2002)]
1         EFGF   16/02/1999     233331.2                []
2      zefsalf   05/09/1998         22.5  [(05, 09, 1998)]
3      sdfslew   12/02/2022    2566552.0  [(12, 02, 2022)]
4      zekfsdf   15/09/1997         23.0                []


In [3]:
df = pd.DataFrame({
    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],
    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],
    'address': ['9910 Surrey Ave.','92 N. Bishop Ave.','9910 Golden Star Ave.', '102 Dunbar St.', '17 West Livingston Court']
})
print("Original DataFrame:")
print(df)

def search_words(text):
    result = re.findall(r'\b[^\d\W]+\b', text)
    return " ".join(result)

df['only_words']=df['address'].apply(lambda x : search_words(x))
print("\nOnly words:")
print(df)

Original DataFrame:
  company_code date_of_sale                   address
0         Abcd   12/05/2002          9910 Surrey Ave.
1         EFGF   16/02/1999         92 N. Bishop Ave.
2      zefsalf   05/09/1998     9910 Golden Star Ave.
3      sdfslew   12/02/2022            102 Dunbar St.
4      zekfsdf   15/09/1997  17 West Livingston Court

Only words:
  company_code date_of_sale                   address             only_words
0         Abcd   12/05/2002          9910 Surrey Ave.             Surrey Ave
1         EFGF   16/02/1999         92 N. Bishop Ave.           N Bishop Ave
2      zefsalf   05/09/1998     9910 Golden Star Ave.        Golden Star Ave
3      sdfslew   12/02/2022            102 Dunbar St.              Dunbar St
4      zekfsdf   15/09/1997  17 West Livingston Court  West Livingston Court


In [4]:
df = pd.DataFrame({
    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],
    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],
    'address': ['9910 Surrey Avenue','92 N. Bishop Avenue','9910 Golden Star Avenue', '102 Dunbar St.', '17 West Livingston Court']
})
print("Original DataFrame:")
print(df)

def pick_only_key_sentence(str1, word):
    result = re.findall(r'([^.]*'+word+'[^.]*)', str1)
    return result
df['filter_sentence']=df['address'].apply(lambda x : pick_only_key_sentence(x,'Avenue'))
print("\nText with the word 'Avenue':")
print(df)

Original DataFrame:
  company_code date_of_sale                   address
0         Abcd   12/05/2002        9910 Surrey Avenue
1         EFGF   16/02/1999       92 N. Bishop Avenue
2      zefsalf   05/09/1998   9910 Golden Star Avenue
3      sdfslew   12/02/2022            102 Dunbar St.
4      zekfsdf   15/09/1997  17 West Livingston Court

Text with the word 'Avenue':
  company_code date_of_sale                   address  \
0         Abcd   12/05/2002        9910 Surrey Avenue   
1         EFGF   16/02/1999       92 N. Bishop Avenue   
2      zefsalf   05/09/1998   9910 Golden Star Avenue   
3      sdfslew   12/02/2022            102 Dunbar St.   
4      zekfsdf   15/09/1997  17 West Livingston Court   

             filter_sentence  
0       [9910 Surrey Avenue]  
1           [ Bishop Avenue]  
2  [9910 Golden Star Avenue]  
3                         []  
4                         []  


In [5]:
df = pd.DataFrame({
    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],
    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],
    'address': ['9910 Surrey Avenue\n9910 Surrey Avenue','92 N. Bishop Avenue','9910 Golden Star Avenue', '102 Dunbar St.\n102 Dunbar St.', '17 West Livingston Court']
})

print("Original DataFrame:")
print(df)

def find_unique_sentence(str1):
    result = re.findall(r'(?sm)(^[^\r\n]+$)(?!.*^\1$)', str1)
    return result

df['unique_sentence']=df['address'].apply(lambda st : find_unique_sentence(st))
print("\nExtract unique sentences :")
print(df)

Original DataFrame:
  company_code date_of_sale                                 address
0         Abcd   12/05/2002  9910 Surrey Avenue\n9910 Surrey Avenue
1         EFGF   16/02/1999                     92 N. Bishop Avenue
2      zefsalf   05/09/1998                 9910 Golden Star Avenue
3      sdfslew   12/02/2022          102 Dunbar St.\n102 Dunbar St.
4      zekfsdf   15/09/1997                17 West Livingston Court

Extract unique sentences :
  company_code date_of_sale                                 address  \
0         Abcd   12/05/2002  9910 Surrey Avenue\n9910 Surrey Avenue   
1         EFGF   16/02/1999                     92 N. Bishop Avenue   
2      zefsalf   05/09/1998                 9910 Golden Star Avenue   
3      sdfslew   12/02/2022          102 Dunbar St.\n102 Dunbar St.   
4      zekfsdf   15/09/1997                17 West Livingston Court   

              unique_sentence  
0        [9910 Surrey Avenue]  
1       [92 N. Bishop Avenue]  
2   [9910 Golden Star

In [6]:
df = pd.DataFrame({
    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],
    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],
    'address': ['9910 Surrey Avenue','92 N. Bishop Avenue','9910 Golden Star Avenue', '102 Dunbar St.', '17 West Livingston Court']
})

print("Original DataFrame:")
print(df)

def find_capital_word(str1):
    result = re.findall(r'\b[A-Z]\w+', str1)
    return result

df['caps_word_in']=df['address'].apply(lambda cw : find_capital_word(cw))
print("\nExtract words starting with capital words from the sentences':")
print(df)

Original DataFrame:
  company_code date_of_sale                   address
0         Abcd   12/05/2002        9910 Surrey Avenue
1         EFGF   16/02/1999       92 N. Bishop Avenue
2      zefsalf   05/09/1998   9910 Golden Star Avenue
3      sdfslew   12/02/2022            102 Dunbar St.
4      zekfsdf   15/09/1997  17 West Livingston Court

Extract words starting with capital words from the sentences':
  company_code date_of_sale                   address  \
0         Abcd   12/05/2002        9910 Surrey Avenue   
1         EFGF   16/02/1999       92 N. Bishop Avenue   
2      zefsalf   05/09/1998   9910 Golden Star Avenue   
3      sdfslew   12/02/2022            102 Dunbar St.   
4      zekfsdf   15/09/1997  17 West Livingston Court   

                caps_word_in  
0           [Surrey, Avenue]  
1           [Bishop, Avenue]  
2     [Golden, Star, Avenue]  
3               [Dunbar, St]  
4  [West, Livingston, Court]  


In [7]:
df = pd.DataFrame({
    'company_code': ['Abcd','EFGF', 'zefsalf', 'sdfslew', 'zekfsdf'],
    'date_of_sale': ['12/05/2002','16/02/1999','05/09/1998','12/02/2022','15/09/1997'],
    'address': ['9910 Surrey <b>Avenue</b>','92 N. Bishop Avenue','9910 <br>Golden Star Avenue', '102 Dunbar <i></i>St.', '17 West Livingston Court']
})
print("Original DataFrame:")
print(df)
def remove_tags(string):
    result = re.sub('<.*?>','',string)
    return result
df['with_out_tags']=df['address'].apply(lambda cw : remove_tags(cw))
print("\nSentences without tags':")
print(df)

Original DataFrame:
  company_code date_of_sale                      address
0         Abcd   12/05/2002    9910 Surrey <b>Avenue</b>
1         EFGF   16/02/1999          92 N. Bishop Avenue
2      zefsalf   05/09/1998  9910 <br>Golden Star Avenue
3      sdfslew   12/02/2022        102 Dunbar <i></i>St.
4      zekfsdf   15/09/1997     17 West Livingston Court

Sentences without tags':
  company_code date_of_sale                      address  \
0         Abcd   12/05/2002    9910 Surrey <b>Avenue</b>   
1         EFGF   16/02/1999          92 N. Bishop Avenue   
2      zefsalf   05/09/1998  9910 <br>Golden Star Avenue   
3      sdfslew   12/02/2022        102 Dunbar <i></i>St.   
4      zekfsdf   15/09/1997     17 West Livingston Court   

              with_out_tags  
0        9910 Surrey Avenue  
1       92 N. Bishop Avenue  
2   9910 Golden Star Avenue  
3            102 Dunbar St.  
4  17 West Livingston Court  
