以下demo是参考[官方API文档](http://docs.pyfunctional.org/en/master/functional.html)写的，精简版见[简书]()

# IO

In [1]:
from functional import seq

## jsonl

In [2]:
%%bash
mkdir ./res
cat << EOF > ./res/json_lines.json
{"name":"Alice", "age":5}
{"name":"Bob", "age":6}
EOF

In [3]:
seq.jsonl('./res/json_lines.json').to_list()

[{'age': 5, 'name': 'Alice'}, {'age': 6, 'name': 'Bob'}]

In [125]:
seq.jsonl('./res/json_lines.json').list() # list() == to_list()

[{'age': 5, 'name': 'Alice'}, {'age': 6, 'name': 'Bob'}]

In [126]:
seq.jsonl('./res/json_lines.json').to_pandas()

Unnamed: 0,age,name
0,5,Alice
1,6,Bob


## open

In [5]:
%%bash
cat << EOF > ./res/csv_file.csv
name,age
Alice,5
Bob,6
EOF

In [6]:
seq.open('./res/csv_file.csv', delimiter='\n').to_list()

['name,age', 'Alice,5', 'Bob,6', '']

# stream

## range

In [7]:
seq.range(5)

[0, 1, 2, 3, 4]

In [130]:
assert seq.range(5) == seq(range(5)) 
assert seq.range(5) == seq([0,1,2,3,4])
assert seq.range(5) == seq(0,1,2,3,4)
assert seq.range(5) == [0,1,2,3,4]

## construct by args

In [9]:
seq(1,2,3) == seq([1,2,3]) # 传进来的是完整的list或者逐个元素，都可以

True

## ~~accumulate~~ / aggregate

accumulate是aggregate的一个特例；所以已经deprecated  
aggregate支持1～3个参数，分别代表

* arg1:  init_value=None， 聚合开始时所用的初始值
* arg2:  fn, (current, next) ==> result， 两两聚合所用的函数
* arg3:  agg_func=None， 在返回结果前执行的最后一个映射

In [10]:
import operator
seq.range(5).aggregate(operator.add) 

10

In [11]:
seq.range(5).aggregate(-2, operator.add)

8

In [12]:
seq.range(5).aggregate(-2, operator.add, str)

'8'

## all/any

In [13]:
seq(False, True).all()

False

In [14]:
seq(False, True).any()

True

## average

In [15]:
from fn import _ as X
f = X**2
seq.range(5).average(f) # f 是取平均之前的一轮映射，可以省略
# [0,1,2,3,4] ==> [0,1,4,9,16] ==> 6.0

6.0

## cartesian

In [131]:
s1 = range(2)
s2 = set('abc')
seq(s1).cartesian(s2).to_list() # 求s1与s2的笛卡尔积

[(0, 'a'), (0, 'b'), (0, 'c'), (1, 'a'), (1, 'b'), (1, 'c')]

## count / ~~count_by_key~~ / ~~count_by_value~~

In [17]:
f = X%2==0
seq.range(5).count(f) # 这个f必填

3

In [18]:
# count_by_key 已经过时，用 reduce_by_key近似
seq([('a', 1), ('b', 2), ('b', 3), ('b', 4), ('c', 3), ('c', 0)]) .reduce_by_key(operator.add).to_list()

[('a', 1), ('b', 9), ('c', 3)]

In [19]:
# count_by_value不可用，用Counter近似
from collections import Counter
s = seq(['a', 'a', 'a', 'b', 'b', 'c', 'd'])
Counter(s)

Counter({'a': 3, 'b': 2, 'c': 1, 'd': 1})

## dict

In [132]:
import numpy as np

# 可以直接传固定值，也可以传default_value_func，就像 defaultdict(default_value_func) 一样

# d = seq([('a', 1), ('b', 2)]).dict('nan')
d = seq([('a', 1), ('b', 2)]).dict(np.random.rand)
d

defaultdict(<function RandomState.rand>, {'a': 1, 'b': 2})

In [133]:
d['a'],d['c'],d['d'],d['e'] # 每次求新值的时候，要运行一下defaultdict的init_value函数

(1, 0.1045269208888322, 0.2721328917391167, 0.8890019300218747)

In [134]:
d['a'],d['c'],d['d'],d['e'] # 多次执行有缓存

(1, 0.1045269208888322, 0.2721328917391167, 0.8890019300218747)

## difference

In [23]:
seq(1,2,3).difference([2,3,4])

[1]

## distinct / distinctby

In [24]:
seq([1, 1, 2, 3, 3, 3, 4]).distinct()

[1, 2, 3, 4]

In [25]:
f = X%3
seq([1, 1, 2, 3, 3, 3, 4]).distinct_by(f) # f 就是hashfunc， hash值相同的多个元素，只取第一个

[3, 1, 2]

## drop / drop_right / drop_while

In [26]:
seq([1, 2, 3, 4, 5]).drop(2) # 去掉开头的2个元素

[3, 4, 5]

In [27]:
seq([1, 2, 3, 4, 5]).drop_right(2) # 去掉结尾的2个元素

[1, 2, 3]

In [28]:
seq([1, 2, 3, 4, 5, 9, 2]).drop_while(X < 3) # 一直drop，直到遇到条件为False

[3, 4, 5, 9, 2]

## empty

In [29]:
seq([]).empty(), seq([1,2]).empty()

(True, False)

## exists / for_all

In [30]:
seq(1,2).exists(X>=2)

True

In [31]:
def is_even_log(n):
    print('log: {}'.format(n))
    return n%2==0

seq.range(10).for_all(is_even_log) # for_all是当且仅当序列中的全部元素都能算出True时，才为True；从log看，有lazy_eval

log: 0
log: 1


False

## filter / filter_not / where

In [32]:
seq(1,2,3).filter(X%2==0)

[2]

In [33]:
seq(1,2,3).filter_not(X%2==0)

[1, 3]

In [72]:
seq(1,2,3).where(X%2==0)

[2]

## find / first

In [34]:
seq(1,2,3).find(X%2==0) # 找到序列中第1个满足条件的元素

2

In [35]:
seq(1,2,3).first() # 返回序列中的第1个元素

1

## flatmap / flatten

In [36]:
seq([[1, 2], [3, 4], [5, 6]]).flatten() # 将 arr_or_arr 展开打平 

[1, 2, 3, 4, 5, 6]

In [135]:
arr_of_arr = [[1, 2, 3], [3, 4], [5, 6]]
fn = X
seq(arr_of_arr).flat_map(lambda a: [min(a),]*4 )
# 对arr_of_arr中的每个子arr，执行fn映射，得到新的子序列（记作ARR）；然后对ARR组成的大的序列执行flatten

[1, 1, 1, 1, 3, 3, 3, 3, 5, 5, 5, 5]

## fold_left / fold_right

In [38]:
seq.range(3).fold_left('a', lambda c,n: '{}_{}'.format(c,n) ) #  从起始值开始， 逐个调用传入的fold函数

'a_0_1_2'

In [39]:
seq.range(3).fold_right('a', lambda n,c: '{}_{}'.format(c,n) ) #  从起始值开始， 逐个调用传入的fold函数； 注意lambda中两个参数的顺序互换了

'a_2_1_0'

## group_by / group_by_key / grouped

In [136]:
seq(["abc", "ab", "z", "f", "qw"]).group_by(len).list()

[(1, ['z', 'f']), (2, ['ab', 'qw']), (3, ['abc'])]

In [137]:
seq([('a', 1), ('b', 2), ('b', 3), ('b', 4), ('c', 3), ('c', 0)]).group_by_key().list()

[('a', [1]), ('b', [2, 3, 4]), ('c', [3, 0])]

In [42]:
seq([1, 2, 3, 4, 5, 6, 7, 8]).grouped(3).map(list).to_list() # 相邻元素大致分组

[[1, 2, 3], [4, 5, 6], [7, 8]]

## init /inits / tail /tails

In [148]:
#  命名很诡异，这不是初始化，而是除最后一个元素以外的子序列
assert seq.range(5).init() == seq.range(5).drop_right(1)
seq.range(5).init()

[0, 1, 2, 3]

In [149]:
seq.range(5).inits().list() # inits 是含开头元素的所有子序列

[[0, 1, 2, 3, 4], [0, 1, 2, 3], [0, 1, 2], [0, 1], [0], []]

In [150]:
seq.range(5).tail() # 除开头元素以外的子序列

[1, 2, 3, 4]

In [152]:
seq.range(5).tails().list() # 含尾元素在内的所有子序列

[[0, 1, 2, 3, 4], [1, 2, 3, 4], [2, 3, 4], [3, 4], [4], []]

## join

In [154]:
seq([('a', 1), ('b', 2), ('c', 3)]).join([('a', 2), ('c', 5)], "inner") # inner是默认行为，可省略

0,1
a,"(1, 2)"
c,"(3, 5)"


In [156]:
seq([('a', 1), ('b', 2)]).join([('a', 3), ('c', 4)], "left")

0,1
a,"(1, 3)"
b,"(2, None)"


In [46]:
seq([('a', 1), ('b', 2)]).join([('a', 3), ('c', 4)], "right")

0,1
a,"(1, 3)"
c,"(None, 4)"


In [47]:
seq([('a', 1), ('b', 2)]).join([('a', 3), ('c', 4)], "outer")

0,1
a,"(1, 3)"
b,"(2, None)"
c,"(None, 4)"


## makestring
其实就是str.join，但是join关键字已经被用了

In [48]:
seq(['a','b',1,{'name':'jack'}]).make_string('@')

"a@b@1@{'name': 'jack'}"

## map

In [49]:
seq.range(5).map(X**2)

[0, 1, 4, 9, 16]

## max_by

In [50]:
from fn import F
seq(1,'abc',55,9999,718).max_by(F(str) >> len)

9999

## order_by

In [51]:
seq(1,'abc',55,9999,718).order_by(F(str)>>len)

[1, 55, 'abc', 718, 9999]

## partition

In [52]:
seq.range(-5,5).partition(X>0).list() # 返回 (truthy, falsy)

[[1, 2, 3, 4], [-5, -4, -3, -2, -1, 0]]

## reduce_by_key

In [158]:
seq([('a', 1), ('b', 2), ('b', 3), ('b', 4), ('c', 3), ('c', 0)]).reduce_by_key(X+X)

0,1
a,1
b,9
c,3


## show

用来显示序列的前几个元素，用tabuate包美观表格格式  
见：

* http://docs.pyfunctional.org/en/master/functional.html#functional.pipeline.Sequence.show
* https://bitbucket.org/astanin/python-tabulate

In [55]:
table = [["Sun",696000,1989100000],["Earth",6371,5973.6],
         ["Moon",1737,73.5],["Mars",3390,641.85]]
seq(table).show(10, headers=["Planet","R (km)", "mass (x 10^29 kg)"])

Planet      R (km)    mass (x 10^29 kg)
--------  --------  -------------------
Sun         696000           1.9891e+09
Earth         6371        5973.6
Moon          1737          73.5
Mars          3390         641.85


## slice

In [58]:
assert seq.range(10).slice(2,5) == seq.range(10)[2:5]
seq.range(10).slice(2,5)

[2, 3, 4]

## sliding 滑动窗口

第一个参数是size，第二个是step(默认=1)

In [63]:
# 模拟100ms的语音，每25ms为一帧，滑动窗口为10ms
seq.range(100).sliding(25, 10).list()

[[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24],
 [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34],
 [20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44],
 [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54],
 [40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64],
 [50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74],
 [60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84],
 [70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94],
 [80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99],
 [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]]

## sorted

In [67]:
seq('a1','c9','b3','b2').sorted(key=X[1], reverse=True)

['c9', 'b3', 'b2', 'a1']

## starmap(smap)

In [68]:
seq([(2, 3), (-2, 1), (0, 10)]).smap(X+X)

[5, -1, 10]

## symmetric_difference
[对称差/环和](https://zh.wikipedia.org/wiki/%E5%AF%B9%E7%A7%B0%E5%B7%AE)

In [69]:
seq([1, 2, 3, 3]).symmetric_difference([2, 4, 5])

[1, 3, 4, 5]

## to_pandas

使用 `pd.DataFrame.from_records()`

In [71]:
seq.jsonl('./res/json_lines.json').to_pandas()

Unnamed: 0,age,name
0,5,Alice
1,6,Bob


## zip / zip_with_index

In [74]:
seq.range(3).zip('abc').list()

[(0, 'a'), (1, 'b'), (2, 'c')]

In [77]:
seq(list('abc')).zip_with_index(start=9).list()

[('a', 9), ('b', 10), ('c', 11)]

# case demo

## basic stream

In [80]:
from functional import seq
from collections import namedtuple
from fn import _ as X

Transaction = namedtuple('Transaction', ['reason', 'amount'])
transactions = [
    Transaction('github', 7),
    Transaction('food', 10),
    Transaction('coffee', 5),
    Transaction('digitalocean', 5),
    Transaction('food', 5),
    Transaction('riotgames', 25),
    Transaction('food', 10),
    Transaction('amazon', 200),
    Transaction('paycheck', -1000)
]

# 找出所有food相关的消费
(seq(transactions)
 .filter(X.reason == 'food')
 .map(X.amount)
 .sum()
)

25

## aggregate & join

In [89]:
%%bash
cat << EOF > ./res/chat_logs.jsonl
{"message":"hello anyone there?","date":"10/09","user":"bob"}
{"message":"need some help with a program","date":"10/09","user":"bob"}
{"message":"sure thing. What do you need help with?","date":"10/09","user":"dave"}
EOF

In [124]:
import re
import operator
from fn.func import curried
from fn import iters

messages = seq.jsonl('./res/chat_logs.jsonl')

# 转小写，去标点，按空格切开
def extract_words(message):
#     return re.sub('[^0-9a-z ]+', '', message.lower()).split(' ')
    # 下面是函数式的写法，并不简洁
    f = F(X.call('lower')) >> (lambda x:re.sub('[^0-9a-z ]+', '', x)) >> X.call('split', ' ')
    return f(message)

(messages
#  .map(lambda log: extract_words(log['message']))
 .map(F() >> X['message'] >> extract_words) # FP写法
 .flatten()
#  .map(lambda word: (word, 1))
 .zip(iters.repeat(1)) # 简化写法
 .reduce_by_key(operator.add)
 .order_by(X[1])
 .to_pandas(columns=['word','count'])
)

Unnamed: 0,word,count
0,thing,1
1,there,1
2,sure,1
3,do,1
4,hello,1
5,you,1
6,a,1
7,some,1
8,program,1
9,anyone,1
