# Regex : re module

In [55]:
import re
pattern = 'this'
text = 'Does this text match the pattern?'
match = re.search(pattern, text)
s = match.start()
e = match.end()
print(f'Found "{match.re.pattern}" in the string : {match.string}\nfrom {s} to {e} ({text[s:e]})') 

Found "this" in the string : Does this text match the pattern?
from 5 to 9 (this)


In [56]:
import re
text = 'abbaaabbbbaaaaa'
pattern = 'ab'
for match in re.findall(pattern, text):
    print(f"{match}")

ab
ab


In [50]:
pattern = re.compile("d")
pattern.search("dog")

<re.Match object; span=(0, 1), match='d'>

In [49]:
'''
split() splits a string into a list delimited by the passed pattern.
The method is invaluable for converting textual data into data structures
that can be easily read and modified by Python as demonstrated in the following example that creates a phonebook.
'''
text = """Ross McFluff: 834.345.1254 155 Elm Street

Ronald Heathmore: 892.345.3428 436 Finley Avenue
Frank Burger: 925.541.7625 662 South Dogwood Way

Heather Albrecht: 548.326.4584 919 Park Place"""
entries = re.split("\n+", text)    
print(entries)
re.split(r'\W+', 'Words, words, words.')

['Ross McFluff: 834.345.1254 155 Elm Street', 'Ronald Heathmore: 892.345.3428 436 Finley Avenue', 'Frank Burger: 925.541.7625 662 South Dogwood Way', 'Heather Albrecht: 548.326.4584 919 Park Place']


['Words', 'words', 'words', '']

In [57]:
'''sub() replaces every occurrence of a pattern with a string or the result of a function.
This example demonstrates using sub() with a function to “munge” text, or randomize the
order of all the characters in each word of a sentence except for the first and last characters:'''

import re ,random
def repl(m):
    inner_word = list(m.group(2))
    print(inner_word)
    random.shuffle(inner_word)
    return m.group(1) + "".join(inner_word) + m.group(3)

text = "Professor Abdolmalek, please report your absences promptly."
re.sub(r"(\w)(\w+)(\w)", repl, text)

['r', 'o', 'f', 'e', 's', 's', 'o']
['b', 'd', 'o', 'l', 'm', 'a', 'l', 'e']
['l', 'e', 'a', 's']
['e', 'p', 'o', 'r']
['o', 'u']
['b', 's', 'e', 'n', 'c', 'e']
['r', 'o', 'm', 'p', 't', 'l']


'Poesfosrr Aeldbalmok, paelse rpreot yuor absneecs pmpotlry.'

In [58]:
# findall() matches all occurrences of a pattern
text = "He was carefully disguised but captured quickly by police."
re.findall(r"\w+ly", text)


['carefully', 'quickly']

In [59]:
matched_obj=re.finditer(r"\w+ly", text)



<callable_iterator at 0x7c109e8>

# Datetime

In [60]:
import datetime

# get Current Time
print(datetime.time)

# Get Todays Date
print(datetime.date.today())


<class 'datetime.time'>
2019-05-16


In [61]:
# Date Arithmetic
import datetime
today = datetime.date.today()
print ('Today :', today)
one_day = datetime.timedelta(days=1)
print ('One day :', one_day)
yesterday = today - one_day
print ('Yesterday:', yesterday)

# Comparing Values
import datetime
import time
print('Times:')
t1 = datetime.time(12, 55, 0)
print (' t1:', t1)
t2 = datetime.time(13, 5, 0)
print (' t2:', t2)
print (' t1 < t2:', t1 < t2)
print ('Dates:')
d1 = datetime.date.today()
print( ' d1:', d1)
d2 = datetime.date.today() + datetime.timedelta(days=1)
print (' d2:', d2)
print (' d1 > d2:', d1 > d2)

Today : 2019-05-16
One day : 1 day, 0:00:00
Yesterday: 2019-05-15
Times:
 t1: 12:55:00
 t2: 13:05:00
 t1 < t2: True
Dates:
 d1: 2019-05-16
 d2: 2019-05-17
 d1 > d2: False


In [19]:
#strftime() and strptime()

dt=datetime.datetime.strptime("21/11/06 16:30", "%d/%m/%y %H:%M")
print(dt)

dt.strftime("%A, %d. %B %Y %I:%M%p")


#Refer http://strftime.org/ for More Details regarding formatting

2006-11-21 16:30:00


'Tuesday, 21. November 2006 04:30PM'

# Json Module

In [62]:
import json
data = [ { 'a':'A', 'b':(2, 4), 'c':3.0 } ]

#Note :  repr() --> returns a printable representational string of the given object.
print('DATA:', repr(data))
data_string = json.dumps(data)
print('JSON:', data_string)

DATA: [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
JSON: [{"a": "A", "b": [2, 4], "c": 3.0}]


In [63]:
#Encoding, and then redecoding, may not give exactly the same type of object.
import json
data = [ { 'a':'A', 'b':(2, 4), 'c':3.0 } ]
print('DATA :', data)
data_string = json.dumps(data)
print('ENCODED:', data_string)
decoded = json.loads(data_string)
print('DECODED:', decoded)
print('ORIGINAL:', type(data[0]['b']))
print('DECODED :', type(decoded[0]['b']))

DATA : [{'a': 'A', 'b': (2, 4), 'c': 3.0}]
ENCODED: [{"a": "A", "b": [2, 4], "c": 3.0}]
DECODED: [{'a': 'A', 'b': [2, 4], 'c': 3.0}]
ORIGINAL: <class 'tuple'>
DECODED : <class 'list'>


In [64]:
import json

data = {}  
data['people'] = []  
data['people'].append({  
    'name': 'Scott',
    'website': 'stackabuse.com',
    'from': 'Nebraska'
})
data['people'].append({  
    'name': 'Larry',
    'website': 'google.com',
    'from': 'Michigan'
})
data['people'].append({  
    'name': 'Tim',
    'website': 'apple.com',
    'from': 'Alabama'
})

with open('data.txt', 'w') as outfile:  
    json.dump(data, outfile)

In [65]:
import json

with open('data.txt') as json_file:  
    data = json.load(json_file)
    for p in data['people']:
        print('Name: ' + p['name'])
        print('Website: ' + p['website'])
        print('From: ' + p['from'])
        print('')

Name: Scott
Website: stackabuse.com
From: Nebraska

Name: Larry
Website: google.com
From: Michigan

Name: Tim
Website: apple.com
From: Alabama



# csv Module

In [66]:
# Reading CSV Files

import csv
import sys
with open('sample.csv', 'rt') as f:
    reader = csv.reader(f)
    for row in reader:
        print(row)

['record1', 'col1', 'col2', 'col3', 'col4']
['record2', 'col1', 'col2', 'col3', 'col4']
['record3', 'col1', 'col2', 'col3', 'col4']
['record4', 'col1', 'col2', 'col3', 'col4']
['record5', 'col1', 'col2', 'col3', 'col4']


In [68]:
# Writing CSV Filesimport csv

# 'rt' --> read + text mode
# 'wt' --> write + text mode
# note : text mode is not mandatory
# Character   Meaning
# 'r'     open for reading (default)
# 'w'     open for writing, truncating the file first
# 'x'     open for exclusive creation, failing if the file already exists
# 'a'     open for writing, appending to the end of the file if it exists
# 'b'     binary mode
# 't'     text mode (default)
# '+'     open a disk file for updating (reading and writing)
# 'U'     universal newlines mode (deprecated)
import sys
with open('out.csv', 'wt') as f:
    writer = csv.writer(f)
    writer.writerow( ('Title 1', 'Title 2', 'Title 3') )
    for i in range(3):
        writer.writerow( (i+1,chr(ord('a') + i),'08/%02d/07' % (i+1),))
    
print(open('out.csv', 'rt').read())

Title 1,Title 2,Title 3

1,a,08/01/07

2,b,08/02/07

3,c,08/03/07




In [69]:
# list existing dialects
print(csv.list_dialects())

# using Custom Delimiters :
import csv
csv.register_dialect('pipes', delimiter='|')
with open('testpipe.txt', 'r') as f:
    reader = csv.reader(f, dialect='pipes')
    for row in reader:
        print(row)

['excel', 'excel-tab', 'unix']
['Title 1', 'Title 2', 'Title 3']
['1', 'first line\nsecond line', '08/18/07']


# glob Module


In [71]:
import glob
print('Named explicitly:')
for name in glob.glob('*.txt'):
    print('\t', name)
print('Named with wildcard:')
for name in glob.glob('../*/*'):
    print('\t', name)

Named explicitly:
	 23.txt
	 common.txt
	 data.txt
	 README.txt
	 testpipe.txt
Named with wildcard:
	 ..\Misc\(Treading on Python 2) Matt Harrison-Treading on Python Volume 2_ Intermediate Python. 2-hairysun (2013).pdf
	 ..\Misc\(Treading on Python 2) Matt Harrison-Treading on Python Volume 2_ Intermediate Python. 2-hairysun (2013)1.pdf
	 ..\Misc\Misc
	 ..\Misc\Python standard library by example (2).pdf
	 ..\Misc\Python standard library by example.pdf
	 ..\Misc\Steven Lott - Functional Python Programming (2015, Packt Publishing).pdf
	 ..\Session 1  Python Bootstrap\Code Walkthrough.ipynb
	 ..\Session 1  Python Bootstrap\int_id.png
	 ..\Session 1  Python Bootstrap\list_id.png
	 ..\Session 1  Python Bootstrap\Python Environment Setup.pptx
	 ..\Session 1  Python Bootstrap\Python- Bootstrap Zero to Intermediate.pptx
	 ..\Session 1  Python Bootstrap\string_id.png
	 ..\Session 1  Python Bootstrap\tuple_id.png
	 ..\Session 1  Python Bootstrap\Untitled.ipynb
	 ..\Session 1  Python Bootstrap\Un

In [72]:
# Use character range ([a-z]) instead of a question mark to match one of several characters.
import glob
for name in glob.glob('*[0-9].txt'):
    print(name)

23.txt


# shutil module

In [73]:
import shutil

shutil.copy('23.txt','misc/')

'misc/23.txt'

In [74]:
shutil.move('misc/23.txt','misc/24.txt')

'misc/24.txt'

In [75]:
usage=shutil.disk_usage('.') # Usage show in Bytes
usage

usage(total=467526479872, used=287651012608, free=179875467264)

In [66]:
usage.total/1024**2 # Total Disk Size in GB

445867.99609375

In [67]:
usage.used/1024**2 # Used Disk Size in GB

274543.33203125

# filecmp module

In [76]:
import filecmp
print(filecmp.cmp('misc/24.txt','23.txt')) # True --> Same File Contents
print(filecmp.cmp('data.txt','23.txt')) # False --> Different File Contents
# By default shallow = True
filecmp.cmp('data.txt','23.txt',shallow=False) # shallow = False --> compare based on File Contents

True
False


False

(['data.txt', '23.txt'], [], [])

In [82]:
import os
os.stat('data.txt')

os.stat_result(st_mode=33206, st_ino=15199648742533941, st_dev=2020546160, st_nlink=1, st_uid=0, st_gid=0, st_size=204, st_atime=1557942225, st_mtime=1557986328, st_ctime=1557942225)

In [77]:
import filecmp
filecmp.dircmp(r'../Session 3 Iterator,Generators and Applications/',r'../Session 4 Python In Built Modules/').report()

diff ../Session 3 Iterator,Generators and Applications/ ../Session 4 Python In Built Modules/
Only in ../Session 3 Iterator,Generators and Applications/ : ['Iterator,Generators and Applications.ipynb', 'Iterator,Generators and Applications.pptx', '~$Functional Programming.pptx']
Only in ../Session 4 Python In Built Modules/ : ['23.txt', 'Built in Modules.ipynb', 'Python In-built Modules.pptx', 'README.txt', 'append.zip', 'data.txt', 'misc', 'out.csv', 'pyzipfile.zip', 'sample.csv', 'sample.zip', 'testpipe.txt', 'write.zip', '~$Python In-built Modules.pptx']
Identical files : ['common.txt']
Common subdirectories : ['.ipynb_checkpoints']


# Zipfile module

In [85]:
import zipfile

# Check file is a zip or not
print(zipfile.is_zipfile('sample.zip'))

zipfile.is_zipfile('sample.csv')

True


False

In [10]:
# Reading Metadata from an Archive
import zipfile
with zipfile.ZipFile('sample.zip', 'r') as zf:
    print(zf.namelist())

['sample.csv', 'testpipe.txt']


In [85]:
# access all the metadata about the ZIP contents, use the infolist() or getinfo()
with zipfile.ZipFile('sample.zip') as zf:
    for fn in zf.namelist():
        print(zf.getinfo(fn))

<ZipInfo filename='sample.csv' compress_type=deflate external_attr=0x20 file_size=139 compress_size=37>
<ZipInfo filename='testpipe.txt' compress_type=deflate external_attr=0x20 file_size=65 compress_size=52>


In [86]:
# Extracting Archived Files from an Archive
import zipfile
with zipfile.ZipFile('sample.zip') as zf:
    for filename in [ 'sample.csv', 'notthere.txt' ]:
        try:
            data = zf.read(filename)
        except KeyError:
            print(f'ERROR: Did not find {filename} in zip file ')
        else:
            print(filename, ':')
            print(data)

        

sample.csv :
b'record1,col1,col2,col3,col4\nrecord2,col1,col2,col3,col4\nrecord3,col1,col2,col3,col4\nrecord4,col1,col2,col3,col4\nrecord5,col1,col2,col3,col4'
ERROR: Did not find notthere.txt in zip file 


In [87]:
#Creating New Archives

with zipfile.ZipFile('write.zip', mode='w') as zf:
    print('adding README.txt')
    zf.write('README.txt')
with zipfile.ZipFile('write.zip', 'r') as zf:
    print(zf.namelist())


adding README.txt
['README.txt']


In [12]:
import zipfile
print('creating archive')

with zipfile.ZipFile('append.zip', mode='w') as zf:
    zf.write('README.txt',arcname='readme1.txt')
with zipfile.ZipFile('append.zip', mode='a') as zf:
    zf.write('README.txt',arcname='readme2.txt')

creating archive


In [13]:
with zipfile.ZipFile('append.zip', 'r') as zf:
    print(zf.namelist())


['readme1.txt', 'readme2.txt']


In [None]:
# Note : zipfile module does not support ZIP files with appended comments or multidisk
# archives. It does support ZIP files larger than 4 GB that use the ZIP64 extensions.

# getpass

In [88]:
import getpass
try:
    p = getpass.getpass()
except Exception as err:
    print(f'ERROR:{err}')
else:
    print(f'You entered:{p}')

········
You entered:Test


In [89]:
# Adding text to input prompt 
p = getpass.getpass(prompt='What is your favorite color?')
print(p)

What is your favorite color?········
blue


# sys Module 

In [1]:
import sys
print(sys.executable)

C:\Users\egokkul\AppData\Local\Continuum\anaconda3\envs\MLAI\python.exe


In [2]:
print(sys.prefix)
print(sys.getrecursionlimit())


C:\Users\egokkul\AppData\Local\Continuum\anaconda3\envs\MLAI
3000


In [28]:
print(sys.getfilesystemencoding())

utf-8


In [3]:
print(sys.path)

['C:\\Users\\egokkul\\AppData\\Local\\Continuum\\anaconda3\\envs\\MLAI\\python36.zip', 'C:\\Users\\egokkul\\AppData\\Local\\Continuum\\anaconda3\\envs\\MLAI\\DLLs', 'C:\\Users\\egokkul\\AppData\\Local\\Continuum\\anaconda3\\envs\\MLAI\\lib', 'C:\\Users\\egokkul\\AppData\\Local\\Continuum\\anaconda3\\envs\\MLAI', '', 'C:\\Users\\egokkul\\AppData\\Local\\Continuum\\anaconda3\\envs\\MLAI\\lib\\site-packages', 'C:\\Users\\egokkul\\AppData\\Local\\Continuum\\anaconda3\\envs\\MLAI\\lib\\site-packages\\IPython\\extensions', 'C:\\Users\\egokkul\\.ipython']


# platform Module


In [6]:
import platform

platform.python_version()

'3.6.8'

In [7]:
print(f'Compiler Version : {platform.python_compiler()}')
print(f'Build : {platform.python_build()}')

Compiler Version : MSC v.1916 64 bit (AMD64)
Build : ('default', 'Feb 21 2019 18:30:04')


In [8]:
print('uname: ',platform.uname())
print('system: ',platform.system())
print('node: ',platform.node())
print('release: ',platform.release())
print('version: ',platform.version())
print('processor: ',platform.processor())

uname:  uname_result(system='Windows', node='5CD7033NH4', release='7', version='6.1.7601', machine='AMD64', processor='Intel64 Family 6 Model 78 Stepping 3, GenuineIntel')
system:  Windows
node:  5CD7033NH4
release:  7
version:  6.1.7601
processor:  Intel64 Family 6 Model 78 Stepping 3, GenuineIntel


# logging

In [9]:
import logging
LOG_FILENAME = 'logging_example.out'
logging.basicConfig(filename=LOG_FILENAME,level=logging.DEBUG)

logging.debug('This message should go to the log file')

with open(LOG_FILENAME, 'rt') as f:
    body = f.read()
print('FILE:')
print(body)

FILE:
DEBUG:root:This message should go to the log file



In [11]:
import glob
import logging
import logging.handlers
LOG_FILENAME = 'logging_rotatingfile_example.log'
# Set up a specific logger with our desired output level
my_logger = logging.getLogger('MyLogger')
my_logger.setLevel(logging.INFO)

# Add the log message handler to the logger
handler = logging.handlers.RotatingFileHandler(LOG_FILENAME,
maxBytes=20,
backupCount=5,
)
my_logger.addHandler(handler)
# Log some messages
for i in range(20):
    my_logger.debug('i = %d' % i)
    # See what files are created
    
logfiles = glob.glob('%s*' % LOG_FILENAME)
for filename in logfiles:
    print(filename)

logging_rotatingfile_example.log
logging_rotatingfile_example.log.1
logging_rotatingfile_example.log.2
logging_rotatingfile_example.log.3
logging_rotatingfile_example.log.4
logging_rotatingfile_example.log.5


In [1]:
import logging
import sys
LEVELS = { 'debug':logging.DEBUG,
'info':logging.INFO,
'warning':logging.WARNING,
'error':logging.ERROR,
'critical':logging.CRITICAL,
}

# if a message is CRITICAL and the loggeris set to ERROR, the message is emitted (50 > 40).
# If a message is a WARNING and the logger is set to produce only messages set to ERROR, the message is not emitted (30 < 40).
level_name=input('Enter the log Level: ')
level = LEVELS.get(level_name, logging.NOTSET)
print(level)
logging.basicConfig(level=level)


Enter the log Level: info
20


In [2]:
logging.debug('This is a debug message')
logging.info('This is an info message')
logging.warning('This is a warning message')
logging.error('This is an error message')
logging.critical('This is a critical error message')

INFO:root:This is an info message
ERROR:root:This is an error message
CRITICAL:root:This is a critical error message


# Subprocess Module

In [3]:
import subprocess

print(subprocess.call(['ipconfig'])) # 0 Process was Execute Successfully
subprocess.call("echo $HOME", shell=True)

0


0

In [6]:
import subprocess
import pprint
output = subprocess.check_output(['ipconfig'])
pprint.pprint(output)

(b'\r\nWindows IP Configuration\r\n\r\n\r\nEthernet adapter Bluetooth Network C'
 b'onnection:\r\n\r\n   Media State . . . . . . . . . . . : Media disconnec'
 b'ted\r\n   Connection-specific DNS Suffix  . : \r\n\r\nWireless LAN adapter'
 b' Wireless Network Connection 3:\r\n\r\n   Media State . . . . . . . . . . . '
 b': Media disconnected\r\n   Connection-specific DNS Suffix  . : \r\n\r\nWir'
 b'eless LAN adapter Wireless Network Connection:\r\n\r\n   Connection-specific'
 b' DNS Suffix  . : in.ao.ericsson.se\r\n   Link-local IPv6 Address . . . . .'
 b' : fe80::8c9a:e680:1933:1f85%14\r\n   IPv4 Address. . . . . . . . . . . : '
 b'100.97.124.86\r\n   Subnet Mask . . . . . . . . . . . : 255.255.240.0\r\n   '
 b'Default Gateway . . . . . . . . . : 100.97.112.1\r\n\r\nEthernet adapter Loc'
 b'al Area Connection:\r\n\r\n   Media State . . . . . . . . . . . : Media disc'
 b'onnected\r\n   Connection-specific DNS Suffix  . : ericsson.se\r\n\r\nEthe'
 b'rnet adapter VirtualBox Host-Only Netwo

In [11]:
import pdb 
LOG_FILENAME='logging_example.out'
with open(LOG_FILENAME, 'rt') as f:
    body = f.read()
    key
print('FILE:')
print(body)

NameError: name 'key' is not defined

In [12]:
%debug

None
> [1;32m<ipython-input-11-68fddba34909>[0m(5)[0;36m<module>[1;34m()[0m
[1;32m      3 [1;33m[1;32mwith[0m [0mopen[0m[1;33m([0m[0mLOG_FILENAME[0m[1;33m,[0m [1;34m'rt'[0m[1;33m)[0m [1;32mas[0m [0mf[0m[1;33m:[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m      4 [1;33m    [0mbody[0m [1;33m=[0m [0mf[0m[1;33m.[0m[0mread[0m[1;33m([0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m----> 5 [1;33m    [0mkey[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m      6 [1;33m[0mprint[0m[1;33m([0m[1;34m'FILE:'[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m[1;32m      7 [1;33m[0mprint[0m[1;33m([0m[0mbody[0m[1;33m)[0m[1;33m[0m[1;33m[0m[0m
[0m
ipdb> body
ipdb> q
