In [37]:
# 原始的创建类的方法 
def generate_cls(a, b):
    class Fake(object):
        def method_a(self, n):
            return n
    Fake.a = a
    Fake.b = b
    return Fake
ins = generate_cls(1, 2)()
# ins.method_a(10)

这不算是动态创建的：
+ 类名（Fake）不方便改变
+ 要创建的类需要的属性和方法越多，就要对应的加码，不灵活

所以考虑用type创建

In [None]:
def method_a(self, n):
    return n
 
ins = type('Fake', (), {'a': 1, 'b': 2, 'method_a': method_a})()

Python2创建类的时候，可以添加一个__metaclass__属性

In [None]:
class Foo(object):
    __metaclass__ = something
    '''
    '''

Python会使用元类来创建Foo这个类。Python会在类定义中寻找__metaclass__。如果找到它，Python会用它来创建对象类Foo。如果没有找到它，Python将使用type来创建这个类。在Python3中语法改变了一下:class Simple1(object, metaclass=something):

一种说法，学懂元类，只需要知道两句话
+ 道生一，一生二，二生三，三生万物 （type-metaclass-class-instance）
+ 我是谁？我从哪来里？我要到哪里去？ (class, bases, attributes)

In [42]:
# 道生一：传入type

class  SayMetaClass(type):

    # 传入三大永恒命题：类名称、父类、属性

    def  __new__(cls,  name,  bases,  attrs):

        # 创造“天赋”

        attrs['say_'+name]  =  lambda  self,value,saying=name:  print(saying+','+value+'!')

        # 传承三大永恒命题：类名称、父类、属性

        return  type.__new__(cls,  name,  bases,  attrs)

# 一生二：创建类

class  Hello(object,  metaclass=SayMetaClass): # 通过元类创建的类，第一个参数是父类，第二个参数是metaclass

    pass

# 二生三：创建实列

hello  =  Hello()

# 三生万物：调用实例方法

hello.say_Hello('world!')

# 一生二：创建类

class  Sayolala(object,  metaclass=SayMetaClass):

    pass

# 二生三：创建实列

s  =  Sayolala()

# 三生万物：调用实例方法

s.say_Sayolala('japan!')


Hello,world!!
Sayolala,japan!!


以下选择两个领域
+ 一个是Django的核心思想，“Object Relational Mapping”，即对象-关系映射，简称ORM
+ 另一个领域是爬虫领域（黑客领域），一个自动搜索网络上的可用代理，然后换着IP去突破别的人反爬虫限制

In [46]:
class  Field(object):

    def  __init__(self,  name,  column_type):

        self.name  =  name

        self.column_type  =  column_type

    def  __str__(self):

        return  '<%s:%s>'  %  (self.__class__.__name__,  self.name)
    
class  StringField(Field):

    def  __init__(self,  name):

        super(StringField,  self).__init__(name,  'varchar(100)') # python2 style 

class  IntegerField(Field):

    def  __init__(self,  name):

        super(IntegerField,  self).__init__(name,  'bigint')
        

class  ModelMetaclass(type):

    def  __new__(cls,  name,  bases,  attrs):

        if  name=='Model':

            return  type.__new__(cls,  name,  bases,  attrs)

        print('Found model: %s'  %  name)

        mappings  =  dict()

        for  k,  v  in  attrs.items():

            if  isinstance(v,  Field):

                print('Found mapping: %s ==> %s'  %  (k,  v))

                mappings[k]  =  v

        for  k  in  mappings.keys():

            attrs.pop(k)

        attrs['__mappings__']  =  mappings  # 保存属性和列的映射关系

        attrs['__table__']  =  name  # 假设表名和类名一致

        return  type.__new__(cls,  name,  bases,  attrs)


class  Model(dict,  metaclass=ModelMetaclass):

    def  __init__(self,  **kwarg):

        super(Model,  self).__init__(**kwarg)

    def  __getattr__(self,  key):

        try:

            return  self[key]

        except  KeyError:

            raise  AttributeError("'Model' object has no attribute '%s'"  %  key)

    def  __setattr__(self,  key,  value):

        self[key]  =  value

    # 模拟建表操作

    def  save(self):

        fields  =  []

        args  =  []

        for  k,  v  in  self.__mappings__.items():

            fields.append(v.name)

            args.append(getattr(self,  k,  None))

        sql  =  'insert into %s (%s) values (%s)'  %  (self.__table__,  ','.join(fields),  ','.join([str(i)  for  i  in  args]))

        print('SQL: %s'  %  sql)

        print('ARGS: %s'  %  str(args))

In [47]:
class  User(Model):

    # 定义类的属性到列的映射：

    id  =  IntegerField('id')

    name  =  StringField('username')

    email  =  StringField('email')

    password  =  StringField('password')



Found model: User
Found mapping: id ==> <IntegerField:id>
Found mapping: name ==> <StringField:username>
Found mapping: email ==> <StringField:email>
Found mapping: password ==> <StringField:password>


In [48]:
u  =  User(id=12345,  name='Batman',  email='batman@nasa.org',  password='iamback')

u.save()

SQL: insert into User (id,username,email,password) values (12345,Batman,batman@nasa.org,iamback)
ARGS: [12345, 'Batman', 'batman@nasa.org', 'iamback']


In [49]:
import  requests

base_headers  =  {

    'User-Agent':  'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.71 Safari/537.36',

    'Accept-Encoding':  'gzip, deflate, sdch',

    'Accept-Language':  'zh-CN,zh;q=0.8'

}

def  get_page(url):

    headers  =  dict(base_headers)

    print('Getting',  url)

    try:

        r  =  requests.get(url,  headers=headers)

        print('Getting result',  url,  r.status_code)

        if  r.status_code  ==  200:

            return  r.text

    except  ConnectionError:

        print('Crawling Failed',  url)

        return  None
    


In [51]:
from  getpage import  get_page

from  pyquery import  PyQuery as  pq

# 道生一：创建抽取代理的metaclass

class  ProxyMetaclass(type):

    """

        元类，在FreeProxyGetter类中加入

        __CrawlFunc__和__CrawlFuncCount__

        两个参数，分别表示爬虫函数，和爬虫函数的数量。

    """

    def  __new__(cls,  name,  bases,  attrs):

        count  =  0

        attrs['__CrawlFunc__']  =  []

        attrs['__CrawlName__']  =  []

        for  k,  v  in  attrs.items():

            if  'crawl_'  in  k:

                attrs['__CrawlName__'].append(k)

                attrs['__CrawlFunc__'].append(v)

                count  +=  1

        for  k  in  attrs['__CrawlName__']:

            attrs.pop(k)

        attrs['__CrawlFuncCount__']  =  count

        return  type.__new__(cls,  name,  bases,  attrs)

# 一生二：创建代理获取类

class  ProxyGetter(object,  metaclass=ProxyMetaclass):

    def  get_raw_proxies(self,  site):

        proxies  =  []

        print('Site',  site)

        for  func in  self.__CrawlFunc__:

            if  func.__name__==site:

                this_page_proxies  =  func(self)

                for  proxy in  this_page_proxies:

                    print('Getting',  proxy,  'from',  site)

                    proxies.append(proxy)

        return  proxies

    def  crawl_daili66(self,  page_count=4):

        start_url  =  'http://www.66ip.cn/{}.html'

        urls  =  [start_url.format(page)  for  page in  range(1,  page_count  +  1)]

        for  url in  urls:

            print('Crawling',  url)

            html  =  get_page(url)

            if  html:

                doc  =  pq(html)

                trs  =  doc('.containerbox table tr:gt(0)').items()

                for  tr in  trs:

                    ip  =  tr.find('td:nth-child(1)').text()

                    port  =  tr.find('td:nth-child(2)').text()

                    yield  ':'.join([ip,  port])

    def  crawl_proxy360(self):

        start_url  =  'http://www.proxy360.cn/Region/China'

        print('Crawling',  start_url)

        html  =  get_page(start_url)

        if  html:

            doc  =  pq(html)

            lines  =  doc('div[name="list_proxy_ip"]').items()

            for  line in  lines:

                ip  =  line.find('.tbBottomLine:nth-child(1)').text()

                port  =  line.find('.tbBottomLine:nth-child(2)').text()

                yield  ':'.join([ip,  port])

    def  crawl_goubanjia(self):

        start_url  =  'http://www.goubanjia.com/free/gngn/index.shtml'

        html  =  get_page(start_url)

        if  html:

            doc  =  pq(html)

            tds  =  doc('td.ip').items()

            for  td in  tds:

                td.find('p').remove()

                yield  td.text().replace(' ',  '')

if  __name__  ==  '__main__':

    # 二生三：实例化ProxyGetter

    crawler  =  ProxyGetter()

    print(crawler.__CrawlName__)

    # 三生万物

    for  site_label in  range(crawler.__CrawlFuncCount__):

        site  =  crawler.__CrawlName__[site_label]

        myProxies  =  crawler.get_raw_proxies(site)

ModuleNotFoundError: No module named 'getpage'

In [30]:
total = 0

def add():
    global total
    for i in range(1000000):
        total += 1
def desc():
    global total
    for i in range(1000000):
        total -= 1

import threading
thread1 = threading.Thread(target=add)
thread2 = threading.Thread(target=desc)
thread1.start()
thread2.start()

thread1.join()
thread2.join()
print(total)

-245255


In [32]:
import time
class GetDetailHtml(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)

    def run(self):
        print("get detail html started")
        time.sleep(2)
        print("get detail html end")

class GetDetailUrl(threading.Thread):
    def __init__(self, name):
        super().__init__(name=name)

    def run(self):
        print("get detail url started")
        time.sleep(4)
        print("get detail url end")

if  __name__ == "__main__":
    thread1 = GetDetailHtml("get_detail_html")
    thread2 = GetDetailUrl("get_detail_url")
    start_time = time.time()
    thread1.start()
    thread2.start()

    # 等待线程执行完才执行主线程
    thread1.join()
    thread2.join()

    #当主线程退出的时候， 子线程kill掉
    print ("last time: {}".format(time.time()-start_time))

get detail html started
get detail url started
get detail html end
get detail url end
last time: 4.00644850730896


In [33]:
import numbers

class IntField:
    #数据描述符
    def __get__(self, instance, owner):
        return self.value
    def __set__(self, instance, value):
        if not isinstance(value, numbers.Integral):
            raise ValueError("int value need")
        if value < 0:
            raise ValueError("positive value need")
        self.value = value
    def __delete__(self, instance):
        pass

class User:
    age = IntField()


if __name__ == "__main__":
    user = User()
    # user.age = 30           # 进入数据描述符的__set__
    # setattr(user, 'age',18) # 进入数据描述符的__get__
    # print(user.age)         # 进入数据描述符的__get__
    user.__dict__["age"] = 18
    print(user.__dict__["age"])

    user.__dict__["age"] = 18
    print(user.age)

18


AttributeError: 'IntField' object has no attribute 'value'

In [34]:
class User:
    age = 1

if __name__ == "__main__":
    user = User()
    user.name = 30         # 保存在user对象的内存中
    print(user.name)       # 从user对象的内存中去取
    user.age = 30          # 保存在user对象的内存中, 不影响类的内存中的值
    print(user.age)       # 进入数据描述符的__get__
    user.__dict__["age"] = 18
    print(user.__dict__["age"])
    print (user.__dict__)

30
30
18
{'name': 30, 'age': 18}
