In [1]:
# The following is to know when this notebook has been run and with which python version.
import time, sys
print(time.ctime())
print(sys.version.split('|')[0])

Wed Jul  1 13:29:17 2020
3.7.6 (default, Jan  8 2020, 13:42:34) 
[Clang 4.0.1 (tags/RELEASE_401/final)]


In [2]:
import numpy as np

# F Some useful libraries that make life easier

This is part of the Python lecture given by Christophe Morisset at IA-UNAM.

This lecture will give some insights to the most useful python libraries. It is NOT exhaustive, you have to read the corresponding manual pages to find the best use you can have of them. The list of all python-included libraries is here: https://docs.python.org/2/library/

### time, datetime and timeit

*  https://docs.python.org/2/library/time.html
*  https://docs.python.org/2/library/datetime.html

In [3]:
import time
import datetime

In [4]:
print(time.ctime()) # current time, in a string format

Wed Jul  1 13:29:24 2020


In [5]:
lt = time.localtime()
print(lt)

time.struct_time(tm_year=2020, tm_mon=7, tm_mday=1, tm_hour=13, tm_min=29, tm_sec=25, tm_wday=2, tm_yday=183, tm_isdst=1)


In [6]:
time.strftime("%a, %d %b %Y %H:%M:%S",lt)

'Wed, 01 Jul 2020 13:29:25'

In [7]:
print(datetime.datetime.today())

2020-07-01 13:29:26.085318


In [8]:
today = datetime.date.today()
philae_landing = datetime.date(2014, 11, 11)

In [9]:
time_to_landing = philae_landing - today

In [10]:
if philae_landing > today:
    print('Philae landing in {} days.'.format(time_to_landing.days))
else:
    print('Philae landed since {} days.'.format(-time_to_landing.days))

Philae landed since 2059 days.


In [11]:
start = time.time()
for i in np.arange(1000):
    t = np.math.factorial(i)
end = time.time()
print('1000 factorials done in {0:.3f} secs.'.format(end-start))

1000 factorials done in 0.021 secs.


In [12]:
from timeit import Timer
command = """\
for i in np.arange(1000):
    t = np.math.factorial(i)
"""
t = Timer(command, setup='import numpy as np')
print(t.timeit(number=10))

0.1761178519999973


in ipython, one can use the magic timeit function:

In [13]:
%timeit np.math.factorial(50)

709 ns ± 6.22 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


or for the whole cell:

In [14]:
%%timeit
for i in np.arange(1000):
    t = np.math.factorial(i)

17.8 ms ± 576 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


### os

https://docs.python.org/2/library/os.html 
This module provides a portable way of using operating system dependent functionality. If you just want to read or write a file see open(), if you want to manipulate paths, see the os.path module, and if you want to read all the lines in all the files on the command line see the fileinput module. For creating temporary files and directories see the tempfile module, and for high-level file and directory handling see the shutil module.

In [15]:
import os

In [16]:
# os.environ is a dictionnary containing the environment variables and values
if 'HOME' in os.environ:
    print(os.environ['HOME'])
else:
    print('This OS is too limited to understand HOME')

/Users/christophemorisset


In [17]:
# if variable doesn't exists, getenv returns None
print(os.getenv('HOMMMMMME'))

None


In [18]:
# get current working directory
cwd = os.getcwd()
print(cwd)
print(cwd.split('/')[-1])

/Users/christophemorisset/Google Drive/Pro/Python-MySQL/Notebooks/Notebooks
Notebooks


In [19]:
new_dir = '/tmp/test3456'
if not os.path.exists(new_dir):
    os.mkdir(new_dir)
    print('Dir {} created'.format(new_dir))
else:
    print('Dir {} exists'.format(new_dir))

Dir /tmp/test3456 created


In [20]:
# list all the files from a directory
os.listdir(new_dir)

[]

In [21]:
os.chdir(cwd)
os.getcwd()

'/Users/christophemorisset/Google Drive/Pro/Python-MySQL/Notebooks/Notebooks'

In [24]:
# Renaming files in a directory
cwd = os.getcwd()
os.chdir('/tmp')
for filename in os.listdir('./'):
    #print(filename)
    base_file, ext = os.path.splitext(filename)
    #print(base_file, ext)
    if ext == '.txt':
        newname = base_file + '.exe'
        os.rename(filename, newname)
        print('file {0} renamed to {1}'.format(filename, newname))
os.chdir(cwd)

file test.txt renamed to test.exe


### glob

https://docs.python.org/2/library/glob.html

No tilde expansion is done, but *, ?, and character ranges expressed with [] will be correctly matched.

In [22]:
from glob import glob

In [23]:
ipynb_files = glob('*.ipynb')
print(ipynb_files)

['Calling Fortran.ipynb', 'intro_Scipy.ipynb', 'Ex1_with_res.ipynb', 'Parallel.ipynb', 'OOP.ipynb', 'Using_astropy.ipynb', 'Interact with files.ipynb', 'Useful_libraries.ipynb', 'intro_numpy.ipynb', 'intro_Python.ipynb', 'Ex1_done.ipynb', 'Ex1.ipynb', 'intro_Matplotlib.ipynb', 'Optimization.ipynb', 'Using_PyMySQL.ipynb']


### pathlib

In [25]:
from pathlib import Path

In [44]:
new_dir = Path('/tmp/test1234')
print(new_dir)
print(new_dir.name)
print(new_dir.parent)

/tmp/test1234
test1234
/tmp


In [45]:
new_dir = Path('/tmp') / Path('test1234')
print(new_dir)
print(new_dir.name)
print(new_dir.parent)

/tmp/test1234
test1234
/tmp


In [46]:
if not new_dir.exists():
    new_dir.mkdir() # may use exist_ok=True
    print('Dir {} created'.format(new_dir))
else:
    print('Dir {} exists'.format(new_dir))

Dir /tmp/test1234 exists


In [47]:
current = Path('.')
for f in current.glob('*.ipynb'):
    print(f, f.stem, f.suffix)

Calling Fortran.ipynb Calling Fortran .ipynb
intro_Scipy.ipynb intro_Scipy .ipynb
Ex1_with_res.ipynb Ex1_with_res .ipynb
Parallel.ipynb Parallel .ipynb
OOP.ipynb OOP .ipynb
Using_astropy.ipynb Using_astropy .ipynb
Interact with files.ipynb Interact with files .ipynb
Useful_libraries.ipynb Useful_libraries .ipynb
intro_numpy.ipynb intro_numpy .ipynb
intro_Python.ipynb intro_Python .ipynb
Ex1_done.ipynb Ex1_done .ipynb
Ex1.ipynb Ex1 .ipynb
intro_Matplotlib.ipynb intro_Matplotlib .ipynb
Optimization.ipynb Optimization .ipynb
Using_PyMySQL.ipynb Using_PyMySQL .ipynb


In [62]:
new_dir = Path('/tmp') / Path('test1234')
for f in new_dir.glob('test*.txt*'):
    f.unlink()

f = new_dir / Path('test1234.txt')
f.touch()
f = new_dir / Path('test5678.txt')
f.touch()
print(list(new_dir.glob('test*.txt*')))

for f in new_dir.glob('test*.txt'):
    new_f = f.with_suffix('.txt2')
    if not new_f.exists():
        f.rename(new_f)
    else:
        print('{} exists'.format(new_f))

print(list(new_dir.glob('test*.txt*')))

[PosixPath('/tmp/test1234/test5678.txt'), PosixPath('/tmp/test1234/test1234.txt')]
[PosixPath('/tmp/test1234/test1234.txt2'), PosixPath('/tmp/test1234/test5678.txt2')]


### sys

https://docs.python.org/2/library/sys.html
This module provides access to some variables used or maintained by the interpreter and to functions that interact strongly with the interpreter. It is always available.

The sys.argv list contains the arguments passed to the script, when the interpreter was started. The first item contains the name of the script itself.

In [37]:
%%writefile sys_text.py
import sys
for arg in sys.argv:
    print(arg)



Overwriting sys_text.py


In [38]:
!cat sys_text.py

import sys
for arg in sys.argv:
    print(arg)


In [39]:
! python sys_text.py tralala

sys_text.py
tralala


In [40]:
%%writefile fact.py
import sys
import numpy
print numpy.math.factorial(int(sys.argv[1]))



Overwriting fact.py


In [41]:
! python fact.py 6

720


A more complete managment of the argument (especially those like --v) is available using the argparse library. https://docs.python.org/2/library/argparse.html

### Talking to the OS subprocess

The os.popen method is deprecated, subprocess is prefered. A good tutorial is here: http://pymotw.com/2/subprocess/

In [1]:
import subprocess

In [2]:
ls -l

total 74656
-rwxr-xr-x  1 christophemorisset  staff    89353 Oct 26  2016 [31m0040000_6.00_33_50_02_15.bin_0.1.gz[m[m*
-rw-------  1 christophemorisset  staff    85600 Jun 30  2017 0050000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    86018 Jun 30  2017 0050000_7.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    86741 Jun 30  2017 0060000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    88843 Jun 30  2017 0070000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    89360 Jun 30  2017 0080000_6.00_33_50_02_15.bin_0.1.gz
-rwxr-xr-x  1 christophemorisset  staff    89971 Oct 26  2016 [31m0090000_6.00_33_50_02_15.bin_0.1.gz[m[m*
-rw-------  1 christophemorisset  staff    89722 Jun 30  2017 0100000_5.00_33_50_02_15.bin_0.1.gz
-rwxr-xr-x  1 christophemorisset  staff    90544 Oct 26  2016 [31m0100000_6.00_33_50_02_15.bin_0.1.gz[m[m*
-rw-------  1 christophemorisset  staff    91192 Jun 30  2017 0100000_

In [3]:
subprocess.call(['ls -l'], shell=True)
# The results is not visible in Notebook. In command line, the files are listed

0

In [5]:
output = subprocess.check_output(['ls', '-l'])
print(type(output))
print(output.decode())

<class 'bytes'>
total 74656
-rwxr-xr-x  1 christophemorisset  staff    89353 Oct 26  2016 0040000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    85600 Jun 30  2017 0050000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    86018 Jun 30  2017 0050000_7.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    86741 Jun 30  2017 0060000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    88843 Jun 30  2017 0070000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    89360 Jun 30  2017 0080000_6.00_33_50_02_15.bin_0.1.gz
-rwxr-xr-x  1 christophemorisset  staff    89971 Oct 26  2016 0090000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    89722 Jun 30  2017 0100000_5.00_33_50_02_15.bin_0.1.gz
-rwxr-xr-x  1 christophemorisset  staff    90544 Oct 26  2016 0100000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    91192 Jun 30  2017 0100000_7.00_33_50_02_15.bin

In [6]:
command = 'ls -l'
popen = subprocess.Popen(command, shell=True)
# The same apply here, no output in Notebook
print(popen)

<subprocess.Popen object at 0x103a5d2e8>


In [7]:
popen = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
res = popen.communicate()[0]
print(type(res))
print(res.decode())

<class 'bytes'>
total 74656
-rwxr-xr-x  1 christophemorisset  staff    89353 Oct 26  2016 0040000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    85600 Jun 30  2017 0050000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    86018 Jun 30  2017 0050000_7.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    86741 Jun 30  2017 0060000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    88843 Jun 30  2017 0070000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    89360 Jun 30  2017 0080000_6.00_33_50_02_15.bin_0.1.gz
-rwxr-xr-x  1 christophemorisset  staff    89971 Oct 26  2016 0090000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    89722 Jun 30  2017 0100000_5.00_33_50_02_15.bin_0.1.gz
-rwxr-xr-x  1 christophemorisset  staff    90544 Oct 26  2016 0100000_6.00_33_50_02_15.bin_0.1.gz
-rw-------  1 christophemorisset  staff    91192 Jun 30  2017 0100000_7.00_33_50_02_15.bin

In [8]:
popen = subprocess.Popen(command, stdout=subprocess.PIPE, shell=True)
for line in iter(popen.stdout.readline, ""):
    lspl = line.split()
    if len(lspl)> 4:
        if int(lspl[4]) > 1000000:
            print(lspl[8])

b'CALIFA_ah7.dat.gz'
b'Demo2.pickle'
b'Demo3.pickle'
b'MySQL.pdf.gz'
b'intro_Matplotlib.ipynb'
b'intro_Matplotlib.pdf'
b'n10017o.fits'
b'swp04345.mxhi'


KeyboardInterrupt: 

### re

The re module provides regular expression tools for advanced string processing. For complex matching and manipulation, regular expressions offer succinct, optimized solutions:

In [1]:
import re
# Search the words starting with "f"
re.findall(r'\bf[a-z]*', 'whifch foOt or hand fell fastest')

['fo', 'fell', 'fastest']

In [32]:
# Remove duplicate words
re.sub(r'(\b[a-z]+) \1', r'\1', 'cat in the the hat')

'cat in the hat'

More in https://docs.python.org/2/library/re.html

### urllib2

In [34]:
# from urllib2 import urlopen # python 2
from urllib.request import urlopen

In [35]:
for line in urlopen('http://dev.on-rev.com/myip.irev'):
    print(line)
    if 'IP' in str(line):
        IP = str(line).split(':')[1][:-7]
print('-------')
print(IP)

b'<html>\n'
b'<body>\n'
b'<p>\n'
b'Remote IP Address:132.248.3.201</p>\n'
b'</body>\n'
b'</html>'
-------
132.248.3.201
