## 1. Regular Expressions

### <font color ='brown'> Notes </font>

#### Format: match = re.search(pat, str) 

+ a, X, 9, < -- ordinary characters just match themselves exactly. 
+ The meta-characters which do not match themselves because they have special meanings are: . ^ $ * + ? { [ ] \ | ( ) 
+ . (a period) -- matches any single character except newline '\n'
+ . (a period) -- matches any
+ \w -- (lowercase w) matches a "word" character: a letter or digit or underbar [a-zA-Z0-9_]. 
+ \b -- boundary between word and non-word
+ \s -- (lowercase s) matches a single whitespace character -- space, newline, return, tab, form [ \n\r\t\f]. 
+ \S (upper case S) matches any non-whitespace character.
+ \t, \n, \r -- tab, newline, return
+ \d -- decimal digit [0-9] 

+ ^ = start, $ = end -- match the start or end of the string
+ \ -- inhibit the "specialness" of a character. 

In [1]:
import re

In [2]:
match = re.search('ix','six')
print(match.group())
#print(match)

ix


In [3]:
match = re.search('jx','six')

if match:
    print('Found ', match.group())
else:
    print('No Match')

No Match


In [5]:
search_str = 'Patronising'
match = re.search('^Pat',search_str)

if match:
    print('Found ', match.group())
else:
    print('No Match')

Found  Pat


In [6]:
## End with
match = re.search('sing$',search_str)

if match:
    print('Found ', match.group())
else:
    print('No Match')

Found  sing


In [7]:
### Starts with
search_str = 'PPatronising'

match = re.search('^Patr',search_str)

if match:
    print('Found ', match.group())
else:
    print('No Match')

No Match


In [9]:
## . = any char but \n

match =  re.search('....','xyzabc')
if match:
    print('Found ', match.group())
else:
    print('No Match')

Found  xyza


+ \w -- (lowercase w) matches a "word" character: a letter or digit or underbar [a-zA-Z0-9_]. 
+ \d -- decimal digit [0-9] 

In [12]:
## \d = digit , \w =  char
#match = re.search('\d\d\d', 'A678B.C') 
match = re.search('\w\w\w@', 'rrabc@xy.com') 

if match:
    print('Found ', match.group())
else:
    print('No Match')

Found  abc@


In [13]:
## \s = space 
match = re.search('\s\d\d\d\s', 'B753 B.C 123 ') 

if match:
    print('Found ', match.group())
else:
    print('No Match')

Found   123 


#### Repetition

+ '+'   -- 1 or more occurrences of the pattern to its left, e.g. 'i+' = one or more i's
+ '*'   -- 0 or more occurrences of the pattern to its left
+ '?'   -- match 0 or 1 occurrences of the pattern to its left

In [20]:
match = re.search('hello+', 'helloooooo') 
if match:
    print('Found ', match.group())

Found  helloooooo


In [21]:
#match = re.search('hello*', 'hell') 
match = re.search('hello*', 'helloooo') 
if match:
    print('Found ', match.group())

Found  helloooo


In [22]:
## \s* = zero or more whitespace chars
## Here look for 3 digits, possibly separated by whitespace.
match = re.search('\d\s*\d\s*\d', 'xx1  23xx')
if match:
    print('Found ', match.group())

Found  1  23


#### Matching emails

In [23]:
str = 'My email is pink-elephant11@google.com. Thank you'

match = re.search('\w+-\w+@\w+\.com', str)
if match:
    print(match.group()  )


pink-elephant11@google.com


In [24]:
str = 'My email is pink-elephant11@google.com. You can also use white-elephant@gmail.com. Thank you'

match = re.search('\w+-\w+@\w+\.com', str)
if match:
    print(match.group()  )

pink-elephant11@google.com


In [25]:
## Suppose we have a text with many email addresses
my_str = 'My email is pink-elephant11@google.com. You can also use white-elephant@gmail.com. Thank you'


  ## re.findall() returns a list of all the found email strings
emails = re.findall('\w+-\w+@\w+\.com', my_str) 
for x in emails:
    print (x)
#print(emails)

pink-elephant11@google.com
white-elephant@gmail.com


In [26]:
# Match any one character
str = 'My email is pink-ele-phant11@google.com. Thank you'

match = re.search('[\w-]+@\w+\.com', str)
#match = re.search('\w+.\w+@\w+\.com', str)
if match:
    print(match.group()  )

pink-ele-phant11@google.com


In [27]:
match = re.search('\w+-\w+@\w+\.com', str)
if match:
    print(match.group()  )

ele-phant11@google.com


#### Group Extraction

In [29]:
#my_str = 'My email is pink-elephant@gmail.com. Thank you'
my_str = 'Please contact support@gmail.com for any queries. You can also reach me at pink-elephant@gmail.com. Thank you'


## Remove specialness of a character
match = re.search('([\w-]+)@([\w-]+\.com)', my_str)
if match:
    print('user name: ', match.group(1) )
    print('email provider: ', match.group(2) )

user name:  support
email provider:  gmail.com


In [30]:
## Suppose we have a text with many email addresses
my_str = 'Please contact support@gmail.com for any queries. You can also reach me at pink.elephant@gmail.com. Thank you'

  ## re.findall() returns a list of all the found email strings
emails = re.findall('[\w\.]+@[\w\.]+', my_str) 
for x in emails:
    print (x)
#print(emails)


support@gmail.com
pink.elephant@gmail.com.


### String Substitution

In [31]:
my_str = 'Anarkali'
print(re.sub('Anar','Champa',my_str))

Champakali


# 2. Math

In [34]:
import math

In [35]:
#dir(math)

In [36]:
math.pow(10,3)

1000.0

In [37]:
math.ceil(3.2)

4

In [38]:
math.floor(3.8)

3

In [39]:
math.trunc(4.7676)

4

In [40]:
round(4.22777,2)

4.23

In [41]:
round(4.6)

5

In [42]:
# log - base 2
math.log(3,2)

1.5849625007211563

In [43]:
# natural log - base e
math.log(3)

1.0986122886681098

In [44]:
math.sqrt(9)

3.0

In [45]:
math.pi

3.141592653589793

In [46]:
math.cos(0)

1.0

In [47]:
math.cos(90)

-0.4480736161291702

In [48]:
math.factorial(5)

120

# 3. Calendar

In [49]:
import calendar
#dir(calendar)

In [50]:
print ("The calendar of year 2023 is : ")
print(calendar.calendar(2023,1,1,5))

The calendar of year 2023 is : 
                                 2023

      January                  February                  March
Mo Tu We Th Fr Sa Su     Mo Tu We Th Fr Sa Su     Mo Tu We Th Fr Sa Su
                   1            1  2  3  4  5            1  2  3  4  5
 2  3  4  5  6  7  8      6  7  8  9 10 11 12      6  7  8  9 10 11 12
 9 10 11 12 13 14 15     13 14 15 16 17 18 19     13 14 15 16 17 18 19
16 17 18 19 20 21 22     20 21 22 23 24 25 26     20 21 22 23 24 25 26
23 24 25 26 27 28 29     27 28                    27 28 29 30 31
30 31

       April                     May                      June
Mo Tu We Th Fr Sa Su     Mo Tu We Th Fr Sa Su     Mo Tu We Th Fr Sa Su
                1  2      1  2  3  4  5  6  7               1  2  3  4
 3  4  5  6  7  8  9      8  9 10 11 12 13 14      5  6  7  8  9 10 11
10 11 12 13 14 15 16     15 16 17 18 19 20 21     12 13 14 15 16 17 18
17 18 19 20 21 22 23     22 23 24 25 26 27 28     19 20 21 22 23 24 25
24 25 26 27 28 29 30 

In [51]:
print ("The Dec month of 2023 is :")
print (calendar.month(2023,12,2,1))

The Dec month of 2023 is :
   December 2023
Mo Tu We Th Fr Sa Su
             1  2  3
 4  5  6  7  8  9 10
11 12 13 14 15 16 17
18 19 20 21 22 23 24
25 26 27 28 29 30 31



In [52]:
calendar.monthcalendar(2023,12)

[[0, 0, 0, 0, 1, 2, 3],
 [4, 5, 6, 7, 8, 9, 10],
 [11, 12, 13, 14, 15, 16, 17],
 [18, 19, 20, 21, 22, 23, 24],
 [25, 26, 27, 28, 29, 30, 31]]

In [53]:
calendar.isleap(2023)

False

# 4. datetime

In [54]:
import pandas as pd

In [55]:
df = pd.read_csv('date_format.csv')
df

Unnamed: 0,Directive,Description,Example
0,%a,"Weekday, short version",Wed
1,%A,"Weekday, full version",Wednesday
2,%w,"Weekday as a number 0-6, 0 is Sunday",3
3,%d,Day of month 01-31,31
4,%b,"Month name, short version",Dec
5,%B,"Month name, full version",December
6,%m,Month as a number 01-12,12
7,%y,"Year, short version, without century",18
8,%Y,"Year, full version",2018
9,%H,Hour 00-23,17


In [56]:
import datetime

In [57]:
x = datetime.datetime.now()
x

datetime.datetime(2023, 10, 20, 22, 44, 27, 799761)

In [58]:
type(x)

datetime.datetime

In [59]:
print(x)

2023-10-20 22:44:27.799761


In [60]:
print(x.year)

2023


In [61]:
print(x.month)

10


In [62]:
print(x.strftime("%B"))

October


In [63]:
print(x.strftime("%d-%b-%Y %I:%M %p"))

20-Oct-2023 10:44 PM


In [64]:
print(x.strftime("%dth %B %Y %H:%M %p"))

20th October 2023 22:44 PM


In [65]:
print(x.strftime("%d-%m-%y"))

20-10-23


##### a. Create your own datetime object

In [66]:
x_datetime=datetime.datetime(2024,1,1,20,25,0,0)
print(x_datetime)

2024-01-01 20:25:00


In [75]:
print(x_datetime.strftime('%A %dst %b %H:%M %p'))

Monday 01st Jan 20:25 PM


##### b. Create a date object

In [68]:
d = datetime.date(2022,12,6)
print(d)

2022-12-06


In [73]:
print("today's date: ", datetime.date.today())

today's date:  2023-10-20


##### c. Create a time object

In [69]:
t = datetime.time(18,50,12)
print(t)

18:50:12


In [70]:
print(t.strftime("%I:%M:%S %p"))

06:50:12 PM


##### Date difference

In [72]:
date1 = datetime.date(2018, 12, 13)
date2 = datetime.date(2019, 2, 25)

diff = date2 - date1
diff.days

74