Convert two lists into one dict

In [1]:
key = ['a', 'b']
values = ['life', 'hard']
dict_new = dict(zip(key, values))

In [2]:
print(dict_new)

{'a': 'life', 'b': 'hard'}


# Sequence

# Function

1. Don't modify the value of global variable inside any function.
2. Python function always return a value

In [48]:
# Python function default return: None
def hello():
    print('Hello~')
print(hello())

Hello~
None


Lambda function

In [49]:
lambda x: 2*x + 1

<function __main__.<lambda>(x)>

BIF: filter() and map()

In [50]:
help(filter)

Help on class filter in module builtins:

class filter(object)
 |  filter(function or None, iterable) --> filter object
 |  
 |  Return an iterator yielding those items of iterable for which function(item)
 |  is true. If function is None, return the items that are true.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



filter function example

In [53]:
temp = filter(None, [1, 0, False, True])
list(temp)

[1, True]

In [54]:
def odd(x):
    return x % 2

temp = filter(odd, range(10))
list(temp)

[1, 3, 5, 7, 9]

In [56]:
temp = filter(lambda x: x % 2, range(10))
list(temp)

[1, 3, 5, 7, 9]

map function

In [55]:
help(map)

Help on class map in module builtins:

class map(object)
 |  map(func, *iterables) --> map object
 |  
 |  Make an iterator that computes the function using arguments from
 |  each of the iterables.  Stops when the shortest iterable is exhausted.
 |  
 |  Methods defined here:
 |  
 |  __getattribute__(self, name, /)
 |      Return getattr(self, name).
 |  
 |  __iter__(self, /)
 |      Implement iter(self).
 |  
 |  __next__(self, /)
 |      Implement next(self).
 |  
 |  __reduce__(...)
 |      Return state information for pickling.
 |  
 |  ----------------------------------------------------------------------
 |  Static methods defined here:
 |  
 |  __new__(*args, **kwargs) from builtins.type
 |      Create and return a new object.  See help(type) for accurate signature.



In [57]:
temp = map(lambda x: x%2, range(10))
list(temp)

[0, 1, 0, 1, 0, 1, 0, 1, 0, 1]

# Recursive Function

In [59]:
def factorial(n):
    if n == 1:
        return 1
    else:
        return n * factorial(n-1)

print(factorial(5))

120


# Python Object Fundamental

private and public variable in Class

In [2]:
class Person:
    name = 'littlefish'

In [3]:
p = Person()
p.name

'littlefish'

In [4]:
# name mangling
class Person:
    __name = 'littlefish'

In [5]:
p = Person()
p.name

AttributeError: 'Person' object has no attribute 'name'

In [6]:
p._Person__name

'littlefish'

Inheritance

In [8]:
class Parent:
    def hello(self):
        print('calling parent method')

class Child(Parent):
    pass

In [11]:
p = Parent()
p.hello()
c = Child()
c.hello()

calling parent method
calling parent method


In [12]:
class Child(Parent):
    def hello(self):
        print('calling child method')

In [14]:
c = Child()
c.hello()
p.hello()

calling child method
calling parent method


In [16]:
import random
class Fish:
    def __init__(self):
        self.x = random.randint(0, 10)
        self.y = random.randint(0, 10)
    
    def move(self):
        self.x -= 1
        print('position: {} and {}'.format(self.x, self.y))

class Carp(Fish):
    pass

class Shark(Fish):
    def __init__(self):
        self.hungry = True
    
    def eat(self):
        if self.hungry:
            print('keep eating')
            self.hungry = False
        else:
            print('Enough eating')

In [19]:
fish = Fish()
fish.move()
shark = Shark()
shark.eat()
shark.move()

position: 9 and 0
keep eating


AttributeError: 'Shark' object has no attribute 'x'

In Shark Class, attribute x is not defined. So we need to initialize x in __init__ function.

In [20]:
# Method 1
class Shark(Fish):
    def __init__(self):
        Fish.__init__(self)
        self.hungry = True
    
    def eat(self):
        if self.hungry:
            print('keep eating')
            self.hungry = False
        else:
            print('Enough eating')

In [21]:
shark = Shark()
shark.eat()
shark.move()

keep eating
position: 0 and 1


In [22]:
# Method 2: super() function
class Shark(Fish):
    def __init__(self):
        super().__init__()
        self.hungry = True
    
    def eat(self):
        if self.hungry:
            print('keep eating')
            self.hungry = False
        else:
            print('Enough eating')

In [23]:
shark = Shark()
shark.eat()
shark.move()

keep eating
position: 8 and 8


Multi-inheritance

In [24]:
class Base1:
    def foo1(self):
        print('foo1 for base1')

class Base2:
    def foo2(self):
        print('foo2 for base2')

class C(Base1, Base2):
    pass

In [25]:
c = C()
c.foo1()
c.foo2()

foo1 for base1
foo2 for base2


Multi-inheritance is not recommended due to its complexity.

Combination

In [27]:
# Define a Pool in which has turtle and fish
# Combine turtle and fish into pool
class Turtle:
    def __init__(self, x):
        self.num = x

class Fish:
    def __init__(self, x):
        self.num = x

class Pool:
    def __init__(self, x, y):
        self.turtle = Turtle(x)
        self.fish = Fish(y)
        
    def print_num(self):
        print('Pool has {} turtles and {} fish'.format(self.turtle.num, self.fish.num))
        

In [28]:
pool = Pool(1, 10)
pool.print_num()

Pool has 1 turtles and 10 fish


In [29]:
pool.__dict__

{'turtle': <__main__.Turtle at 0x1088509e8>,
 'fish': <__main__.Fish at 0x108850940>}

In [30]:
Pool.__dict__

mappingproxy({'__module__': '__main__',
              '__init__': <function __main__.Pool.__init__(self, x, y)>,
              'print_num': <function __main__.Pool.print_num(self)>,
              '__dict__': <attribute '__dict__' of 'Pool' objects>,
              '__weakref__': <attribute '__weakref__' of 'Pool' objects>,
              '__doc__': None})

Related BIF

issubclass(class, classinfo)

isinstance(object, classinfo)

hasattr(object, name)

getattr(object, name[ , default])

setattr(object, name)

delattr(object, name)

property(fget=None, fset=None, fdel=None, doc=None)

In [31]:
class C:
    def __init__(self, size=10):
        self.size = size
    
    def getsize(self):
        return self.size
    
    def setsize(self, value):
        self.size = value
        
    def delsize(self):
        del self.size
    
    x = property(getsize, setsize, delsize)

In [33]:
c1 = C()
c1.getsize()

10

In [34]:
c1.x

10

In [35]:
c1.x = 18

In [36]:
c1.x

18

In [37]:
c1.size

18

In [38]:
del c1.x
c1.size

AttributeError: 'C' object has no attribute 'size'

property function is easier for packaging any variable with functions

# Magic Methods

In [40]:
# Example 1: __init__ is needed to be defined
class Rectangle:
    def __init__(self, x, y):
        self.x = x
        self.y = y
        
    def getPeri(self):
        return 2*(self.x +  self.y)
    
    def getArea(self):
        return self.x * self.y

In [42]:
rect.getArea()

12

In [43]:
# Example: when __new__() needs to be re-defined

# As class str is non-alterable, __new__() needs to be re-defined so that CapStr can be edited
class CapStr(str):
    def __new__(cls, string):
        string = string.upper()
        return str.__new__(cls, string)

In [44]:
a = CapStr("I love money")

In [45]:
a

'I LOVE MONEY'

In [46]:
class C:
    def __init__(self):
        print('calling __init__() method!')
    
    def __del__(self):
        print('calling __del__() method!')

In [47]:
c1 = C()
c2 = c1
c3 = c2
del c3
print('---')
del c2
print('---')
del c1
print('---')

calling __init__() method!
---
---
calling __del__() method!
---


Basic __Magic Method__: Factory Function (int, tuple, list)

In [60]:
type(len)

builtin_function_or_method

In [61]:
type(int)

type

int('123'):

initiate a int class instance with '123' parsed

In [62]:
class New_int(int):
    def __add__(self, other):
        return int.__sub__(self, other)

    def __sub__(self, other):
        return int.__add__(self, other)

In [63]:
a = New_int(3)
b = New_int(5)

In [64]:
a + b

-2

In [69]:
class Try_int(int):
    def __add__(self, other):
#         return self + other
        return int(self) + int(other)
    def __sub__(self, other):
        return int(self) - int(other)

In [70]:
a = Try_int(3)
b = Try_int(5)

In [71]:
a - b

-2

In [68]:
help(int)

Help on class int in module builtins:

class int(object)
 |  int([x]) -> integer
 |  int(x, base=10) -> integer
 |  
 |  Convert a number or string to an integer, or return 0 if no arguments
 |  are given.  If x is a number, return x.__int__().  For floating point
 |  numbers, this truncates towards zero.
 |  
 |  If x is not a number or if base is given, then x must be a string,
 |  bytes, or bytearray instance representing an integer literal in the
 |  given base.  The literal can be preceded by '+' or '-' and be surrounded
 |  by whitespace.  The base defaults to 10.  Valid bases are 0 and 2-36.
 |  Base 0 means to interpret the base from the string as an integer literal.
 |  >>> int('0b100', base=0)
 |  4
 |  
 |  Methods defined here:
 |  
 |  __abs__(self, /)
 |      abs(self)
 |  
 |  __add__(self, value, /)
 |      Return self+value.
 |  
 |  __and__(self, value, /)
 |      Return self&value.
 |  
 |  __bool__(self, /)
 |      self != 0
 |  
 |  __ceil__(...)
 |      Ceiling of

Magic Method 2:

In [72]:
class int(int):
    def __add__(self, other):
        return int.__sub__(self, other)

In [73]:
a = int('5')
a

5

In [74]:
class Nint(int):
    def __radd__(self, other):
        return int.__sub__(self, other)

In [75]:
a = Nint(5)
b = Nint(3)
# __radd__(b, a) == __sub__(b, a)
a + b

2

__Inplace Operator__

__Unary Operator__

__Experimental Class Custom__

Requirements:

1. a class to record
2. method: start(), stop()
3. print(t1) or calling t1 both show results
4. when the record class is not working, calling stop() will trigger warning
5. two recording methods: t1 + t2
6. limited resources available

In [79]:
# __str__ is called when class instance is called for printing
class A():
    def __str__(self):
        return 'for printing display'

In [80]:
a = A()
print(a)

for printing display


In [78]:
#  __repr__ is called when class instance is the input 
class B():
    def __repr__(self):
        return 'for display'

In [81]:
b = B()
b

for display

MyTimer:

In [1]:
import time as t

In [14]:
class MyTimer():

# names of attributes and methods cannot be the same
    
    def __init__(self):
        self.unit = ['Y', 'M', 'D', 'H', 'M', 'S']
        self.prompt = "not start yet"
        self.lasted = []
        self.begin = 0
        self.end = 0
    
    def __str__(self):
        return self.prompt

    __repr__ = __str__
    
    
    def __add__(self, other):
        prompt = 'whole running time:'
        result = []
        for index in range(6):
            result.append(self.lasted[index] + other.lasted[index])
            if result[index]:
                prompt += (str(result[index]) + self.unit[index])
        return prompt
    
    
    # start recording
    def start(self):
        self.begin = t.localtime()
        self.prompt = 'warning: please call stop() first'
        print("start recording...")
        
    # stop recording
    def stop(self):
        if not self.begin:
            print('warning: please call start() first')
        else:    
            self.end = t.localtime()
            self.__calc()
            print('stop recording...')
        
    # built-in method for time calculation
    def __calc(self):
        self.lasted = []
        self.prompt = 'Whole time running: '
        for index in range(6):
            self.lasted.append(self.end[index] - self.begin[index])
            if self.lasted[index]:
                self.prompt += (str(self.lasted[index]) + self.unit[index])
        # next-round recording
        self.begin = 0
        self.end = 0

In [15]:
t1 = MyTimer()

In [16]:
t1.stop()



In [17]:
t1.start()

start recording...


In [18]:
t1



In [19]:
t1.stop()

stop recording...


In [20]:
t1

Whole time running: 1S

In [22]:
t2 = MyTimer()
t2.start()

start recording...


In [23]:
t2.stop()

stop recording...


In [24]:
t2

Whole time running: 4S

In [25]:
t1 + t2

'whole running time:5S'

# Attributes

In [26]:
class C:
    def __init__(self):
        self.x = 'X-man'

In [27]:
c = C()

In [28]:
c.x

'X-man'

In [29]:
getattr(c, 'x', 'non-exist')

'X-man'

In [30]:
getattr(c, 'y', 'non-exist')

'non-exist'

In [34]:
# use property to fetch attributes in class

class C:
    def __init__(self, size=10):
        self.size = size
    def getSize(size):
        return self.size
    def setSize(self, value):
        self.size = value
    def delSize(size):
        del self.size
    x = property(getSize, setSize, delSize)

In [35]:
c = C()

In [36]:
# use x to represent size in class C
c.x = 1

In [37]:
c.size

1

In [7]:
class C:
    def __getattribute__(self, name):
        print('getattribute')
        return super().__getattribute__(name)
    
    def __getattr__(self, name):
        print('getattr')
        
    def __setattr__(self, name, value):
        print('setattr')
        return super().__setattr__(name, value)
    
    def __delattr__(self, name):
        print('delattr')
        super().__delattr__(name)

In [8]:
c = C()
c.x

getattribute
getattr


In [9]:
c.x = 1

setattr


In [10]:
c.x

getattribute


1

In [11]:
del c.x

delattr


In [16]:
# Dead Loop Risk
# Exercise: Rectangle
class Rectangle:
    def __init__(self, width=0, height=0):
        self.width = width
        self.height = height
        # these two lines will call __setattr__
        
    def __setattr__(self, name, value):
        if name == 'square':
            self.width = value
            self.height = value
        else:
            # solution 1:
            # the method of base class needs to be called
            super().__setattr__(name, value)
            
            # solution 2:
            # self.__dict__[name] = value
    
    def getArea(self):
        return self.width * self.height

In [17]:
r1 = Rectangle(4, 5)

In [18]:
r1.getArea()

20

In [19]:
r1.square = 10

In [20]:
r1.height

10

In [21]:
r1.getArea()

100

In [23]:
r1.__dict__

{'width': 10, 'height': 10}

__Property__ Function

Descriptor:

assign __the real instance of a special class__ as __the attribute of another class__

In [1]:
class MyDescriptor:
    def __get__(self, instance, owner):
        print('getting...', self, instance, owner)
    
    def __set__(self, instance, value):
        print('setting... ', self, instance, value)
        
    def __delete__(self, instance):
        print('deleting...', self, instance)

In [2]:
class Test:
    x = MyDescriptor()

In [3]:
test = Test()

In [4]:
test.x

getting... <__main__.MyDescriptor object at 0x1078e9908> <__main__.Test object at 0x1078f44e0> <class '__main__.Test'>


In [5]:
test

<__main__.Test at 0x1078f44e0>

In [6]:
Test

__main__.Test

In [7]:
test.x = 'x-Man'

setting...  <__main__.MyDescriptor object at 0x1078e9908> <__main__.Test object at 0x1078f44e0> x-Man


In [8]:
del test.x

deleting... <__main__.MyDescriptor object at 0x1078e9908> <__main__.Test object at 0x1078f44e0>


In [15]:
class MyProperty:
    def __init__(self, fget=None, fset=None, fdel=None):
        self.fget = fget
        self.fset = fset
        self.fdel = fdel
        
    def __get__(self, instance, owner):
        return self.fget(instance)
    
    def __set__(self, instance, value):
        self.fset(instance, value)
        
    def __delete__(self, instance):
        self.fdel(instance)

In [16]:
class C:
    def __init__(self):
        self._x = None
    
    def getX(self):
        return self._x

    def setX(self, value):
        self._x = value
    
    def delX(self):
        del self._x
    
    x = MyProperty(getX, setX, delX)

In [17]:
c = C()
c.x = 'x-man'

In [18]:
c._x

'x-man'

In [19]:
del c.x

In [21]:
# Practice

# define a class for temperature from C to K
# the two attributes can be converted automatically

In [37]:
class Celsic:
    def __init__(self, value = 26.0):
        self.value = float(value)
    
    def __get__(self, instance, owner): 
        return self.value
        
    def __set__(self, instance, value):
        self.value = float(value)
        
class Fahrenheit:
    def __get__(self, instance, owner):
        # self: this class
        # instance: the real instance which the current class belongs
        return instance.cel * 1.8 + 32
    
    def __set__(self, instance, value):
        instance.cel = (float(value) - 32)/1.8
    
class Temperature:
    cel = Celsic()
    fah = Fahrenheit()

In [38]:
temp = Temperature()

In [39]:
temp.cel

26.0

In [40]:
temp.cel = 30

In [41]:
temp.fah

86.0

In [42]:
temp.fah = 100

In [43]:
temp.cel

37.77777777777778

__Customise SEQUENCE__

In [79]:
# define a customised list
# 1. immutable
# 2. calculate how many times each element is touched

class CList:    
    def __init__(self, *args):
        self.values = [x for x in args]
        self.count = {}.fromkeys(range(len(self.values)), 0)
    
    def __len__(self):
        return len(self.values)
    
    def __getitem__(self, key):
        self.count[key] += 1
        return self.values[key]

In [80]:
x = CList(1,2,3,4,5,5,6,7,7)

In [81]:
x[1]

2

In [82]:
x.count

{0: 0, 1: 1, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 0, 8: 0}

# Iterator

In [84]:
# example:
# for is used to trigger each element in this Iterator
for i in 'FishC':
    print(i)

F
i
s
h
C


In [85]:
string = "FishC"
it = iter(string)

In [86]:
next(it)

'F'

In [87]:
next(it)

'i'

In [91]:
# define an iterator
class Fibs:
    def __init__(self, n=20):
        self.a = 0
        self.b = 1
        self.n = n
        
    def __iter__(self):
        return self
    
    def __next__(self):
        self.a, self.b = self.b, self.a + self.b
        if self.a > self.n:
            raise StopIteration
        else:
            return self.a

In [92]:
fibs = Fibs()

In [93]:
for each in fibs:
    if each < 100:
        print(each)
    else:
        break

1
1
2
3
5
8
13


In [94]:
fibs = Fibs(50)
for each in fibs:
    print(each)

1
1
2
3
5
8
13
21
34


# Generator

In [95]:
def myGen():
    print('generator executing!')
    yield 1
    yield 2

In [97]:
myG = myGen()

In [98]:
next(myG)

generator executing!


1

In [99]:
next(myG)

2

In [100]:
next(myG)

StopIteration: 

In [102]:
for i in myGen():
    print(i)

generator executing!
1
2


In [103]:
def libs():
    a = 0
    b = 1
    while True:
        a, b = b, a+b
        yield a

In [105]:
for each in libs():
    if each > 100:
        break
    print(each, end = ' ')

1 1 2 3 5 8 13 21 34 55 89 

In [106]:
a = [i for i in range(100) if not (i%2) and i%3]

In [107]:
a

[2,
 4,
 8,
 10,
 14,
 16,
 20,
 22,
 26,
 28,
 32,
 34,
 38,
 40,
 44,
 46,
 50,
 52,
 56,
 58,
 62,
 64,
 68,
 70,
 74,
 76,
 80,
 82,
 86,
 88,
 92,
 94,
 98]

In [108]:
b = {i:i%2 == 0 for i in range(10)}

In [109]:
b

{0: True,
 1: False,
 2: True,
 3: False,
 4: True,
 5: False,
 6: True,
 7: False,
 8: True,
 9: False}

In [110]:
c = {i for i in [1, 1, 2, 3, 4, 5, 6, 5]}

In [111]:
c

{1, 2, 3, 4, 5, 6}

In [112]:
d = "i for i in 'I love code'"

In [113]:
d

"i for i in 'I love code'"

In [114]:
e = (i for i in range(10))

In [115]:
e

<generator object <genexpr> at 0x107e95a20>

In [116]:
next(e)

0

In [117]:
sum(range(10))

45

# Module

# Copy

deepcopy

shallowcopy [:]

In [119]:
__name__

'__main__'

module.__name__: module_name

__name__ is used to identify which script is the main one

In [120]:
def c2f(cel):
    fah = cel*1.8 + 32

def f2c(fah):
    cel = (fah-32)/1.8
    
def test():
    print('0C = {} F'.format(c2f(0)))

In [121]:
import sys
sys.path

['/Users/robertwan/Github/python3_object_oriented',
 '/usr/local/Cellar/python/3.7.2/Frameworks/Python.framework/Versions/3.7/lib/python37.zip',
 '/usr/local/Cellar/python/3.7.2/Frameworks/Python.framework/Versions/3.7/lib/python3.7',
 '/usr/local/Cellar/python/3.7.2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/lib-dynload',
 '',
 '/usr/local/lib/python3.7/site-packages',
 '/usr/local/lib/python3.7/site-packages/IPython/extensions',
 '/Users/robertwan/.ipython']

package

# Pythonic

In [123]:
import timeit

In [124]:
timeit.__doc__

"Tool for measuring execution time of small code snippets.\n\nThis module avoids a number of common traps for measuring execution\ntimes.  See also Tim Peters' introduction to the Algorithms chapter in\nthe Python Cookbook, published by O'Reilly.\n\nLibrary usage: see the Timer class.\n\nCommand line usage:\n    python timeit.py [-n N] [-r N] [-s S] [-p] [-h] [--] [statement]\n\nOptions:\n  -n/--number N: how many times to execute 'statement' (default: see below)\n  -r/--repeat N: how many times to repeat the timer (default 5)\n  -s/--setup S: statement to be executed once initially (default 'pass').\n                Execution time of this setup statement is NOT timed.\n  -p/--process: use time.process_time() (default is time.perf_counter())\n  -v/--verbose: print raw timing results; repeat for more digits precision\n  -u/--unit: set the output time unit (nsec, usec, msec, or sec)\n  -h/--help: print this usage message and exit\n  --: separate options from statement, use when statement

In [125]:
print(timeit.__doc__)

Tool for measuring execution time of small code snippets.

This module avoids a number of common traps for measuring execution
times.  See also Tim Peters' introduction to the Algorithms chapter in
the Python Cookbook, published by O'Reilly.

Library usage: see the Timer class.

Command line usage:
    python timeit.py [-n N] [-r N] [-s S] [-p] [-h] [--] [statement]

Options:
  -n/--number N: how many times to execute 'statement' (default: see below)
  -r/--repeat N: how many times to repeat the timer (default 5)
  -s/--setup S: statement to be executed once initially (default 'pass').
                Execution time of this setup statement is NOT timed.
  -p/--process: use time.process_time() (default is time.perf_counter())
  -v/--verbose: print raw timing results; repeat for more digits precision
  -u/--unit: set the output time unit (nsec, usec, msec, or sec)
  -h/--help: print this usage message and exit
  --: separate options from statement, use when statement starts with -
  stat

In [126]:
dir(timeit)

['Timer',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__spec__',
 '_globals',
 'default_number',
 'default_repeat',
 'default_timer',
 'dummy_src_name',
 'gc',
 'itertools',
 'main',
 'reindent',
 'repeat',
 'sys',
 'template',
 'time',
 'timeit']

In [127]:
timeit.__all__

['Timer', 'timeit', 'repeat', 'default_timer']

In [128]:
# source code file position
timeit.__file__

'/usr/local/Cellar/python/3.7.2/Frameworks/Python.framework/Versions/3.7/lib/python3.7/timeit.py'

help(timeit)

# Python Crawler / Web Spider

1. Python: fetch Internet

URL: 

protocol: /hostname[:port] / path / [;parameters][?query]#fragment 

protocol: http, https, ftp, file, ed2k....

urllib.request.urlopen(url, *args...)

In [132]:
import urllib.request

In [133]:
response = urllib.request.urlopen('http://www.fishc.com')

In [134]:
html = response.read()
# binary data
html

b'<!DOCTYPE html>\n<html lang="en">\n<head>\n    <meta charset="UTF-8">\n    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n    <meta name="keywords" content="\xe9\xb1\xbcC\xe5\xb7\xa5\xe4\xbd\x9c\xe5\xae\xa4|\xe5\x85\x8d\xe8\xb4\xb9\xe7\xbc\x96\xe7\xa8\x8b\xe8\xa7\x86\xe9\xa2\x91\xe6\x95\x99\xe5\xad\xa6|Python\xe6\x95\x99\xe5\xad\xa6|Web\xe5\xbc\x80\xe5\x8f\x91\xe6\x95\x99\xe5\xad\xa6|\xe5\x85\xa8\xe6\xa0\x88\xe5\xbc\x80\xe5\x8f\x91\xe6\x95\x99\xe5\xad\xa6|C\xe8\xaf\xad\xe8\xa8\x80\xe6\x95\x99\xe5\xad\xa6|\xe6\xb1\x87\xe7\xbc\x96\xe6\x95\x99\xe5\xad\xa6|Win32\xe5\xbc\x80\xe5\x8f\x91|\xe5\x8a\xa0\xe5\xaf\x86\xe4\xb8\x8e\xe8\xa7\xa3\xe5\xaf\x86|Linux\xe6\x95\x99\xe5\xad\xa6">\n    <meta name="description" content="\xe9\xb1\xbcC\xe5\xb7\xa5\xe4\xbd\x9c\xe5\xae\xa4\xe4\xb8\xba\xe5\xa4\xa7\xe5\xae\xb6\xe6\x8f\x90\xe4\xbe\x9b\xe6\x9c\x80\xe6\x9c\x89\xe8\xb6\xa3\xe7\x9a\x84\xe7\xbc\x96\xe7\xa8\x8b\xe8\xa7\x86\xe9\xa2\x91\xe6\x95\x99\xe5\xad\xa6\xe3\x80\x82">\n    <me

In [135]:
html = html.decode("utf-8")
html

'<!DOCTYPE html>\n<html lang="en">\n<head>\n    <meta charset="UTF-8">\n    <meta name="viewport" content="width=device-width, initial-scale=1.0">\n    <meta name="keywords" content="鱼C工作室|免费编程视频教学|Python教学|Web开发教学|全栈开发教学|C语言教学|汇编教学|Win32开发|加密与解密|Linux教学">\n    <meta name="description" content="鱼C工作室为大家提供最有趣的编程视频教学。">\n    <meta name="author" content="鱼C工作室">\n    <title>鱼C工作室-免费编程视频教学|Python教学|Web开发教学|全栈开发教学|C语言教学|汇编教学|Win32开发|加密与解密|Linux教学</title>\n    <link rel="shortcut icon" type="image/x-icon" href="img/favicon.ico">\n    <link rel="stylesheet" href="css/styles.css">\n    <!-- <link rel="stylesheet" href="css/timeline.css"> -->\n    <script src="js/jq.js"></script>\n    <script src="js/fishcEgg.js"></script>\n    <script>\n        $(document).ready(function() {\n            var windowHeight = document.documentElement.clientHeight || document.body.clientHeight;\n\n            createStoryJS({\n                type:       \'timeline\',\n                width:      \'auto\',\n       

# Example 1: Download a Cat: placekitten.com

In [136]:
import urllib.request

response = urllib.request.urlopen('http://placekitten.com/g/500/600')
cat_img = response.read()
with open('cat_500_600.jpg', 'wb') as f:
    f.write(cat_img)

In [137]:
import urllib.request

req = urllib.request.Request('http://placekitten.com/g/500/600')
response = urllib.request.urlopen(req)
response.geturl()

'http://placekitten.com/g/500/600'

In [139]:
print(response.info())

Date: Thu, 28 Mar 2019 12:08:22 GMT
Content-Type: image/jpeg
Content-Length: 27650
Connection: close
Set-Cookie: __cfduid=dcdb560907b8db1fe62869aa7cef85fa71553774902; expires=Fri, 27-Mar-20 12:08:22 GMT; path=/; domain=.placekitten.com; HttpOnly
Access-Control-Allow-Origin: *
Cache-Control: public, max-age=86400
Expires: Fri, 29 Mar 2019 12:08:22 GMT
CF-Cache-Status: HIT
Accept-Ranges: bytes
Vary: Accept-Encoding
Server: cloudflare
CF-RAY: 4be97d377dcda5d8-NRT




In [140]:
response.getcode()

200

# Example 2: Youdao Translation

In [181]:
import urllib.parse

url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

data = {}
data['i'] = 'I love being great' 
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15537751261296'
data['sign'] = 'dcafe8944840b3f17fa3c17c0b3ddbbe'
data['ts'] = '1553775126129'
data['bv'] = '533f9b76f2d3fa34628b3cd7d7dddf96'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_REALTlME'
data['typoResult'] = 'false'

data = urllib.parse.urlencode(data).encode()
req = urllib.request.Request(url)
response = urllib.request.urlopen(url, data)


In [182]:
html = response.read().decode()
print(html)

                          {"type":"EN2ZH_CN","errorCode":0,"elapsedTime":1,"translateResult":[[{"src":"I love being great","tgt":"我喜欢做大"}]]}



In [183]:
response.geturl()

'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

In [180]:
print(response.info())

Server: Tengine
Date: Thu, 28 Mar 2019 12:53:56 GMT
Content-Type: application/json;charset=utf-8
Transfer-Encoding: chunked
Connection: close
Vary: Accept-Encoding
Vary: Accept-Encoding
Content-Language: en-US
Set-Cookie: OUTFOX_SEARCH_USER_ID=1450854287@119.147.183.51; domain=.youdao.com; expires=Sat, 20-Mar-2049 12:53:55 GMT




In [185]:
import json
target = json.loads(html)
print(target)

{'type': 'EN2ZH_CN', 'errorCode': 0, 'elapsedTime': 1, 'translateResult': [[{'src': 'I love being great', 'tgt': '我喜欢做大'}]]}


# Example 2.1 Hiding

Headers: User-Agent

In [193]:
import urllib.parse

url = 'http://fanyi.youdao.com/translate?smartresult=dict&smartresult=rule'

'''
head = {}
head['User-Agent'] = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'
'''

data = {}
data['i'] = 'I love being great' 
data['from'] = 'AUTO'
data['to'] = 'AUTO'
data['smartresult'] = 'dict'
data['client'] = 'fanyideskweb'
data['salt'] = '15537751261296'
data['sign'] = 'dcafe8944840b3f17fa3c17c0b3ddbbe'
data['ts'] = '1553775126129'
data['bv'] = '533f9b76f2d3fa34628b3cd7d7dddf96'
data['doctype'] = 'json'
data['version'] = '2.1'
data['keyfrom'] = 'fanyi.web'
data['action'] = 'FY_BY_REALTlME'
data['typoResult'] = 'false'

data = urllib.parse.urlencode(data).encode('utf-8')

req = urllib.request.Request(url, data)
# req = urllib.request.Request(url, data, head)
req.add_header('User-Agent', 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36')
response = urllib.request.urlopen(req)
html = response.read().decode('utf-8')


In [194]:
req.headers

{'User-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.119 Safari/537.36'}

Make an Eclipsed Time for crawling

In [196]:
import time
time.sleep(5)

Proxy