# IO

In [1]:
from functional import seq

## jsonl

In [2]:
%%bash
mkdir ./res
cat << EOF > ./res/json_lines.json
{"name":"Alice", "age":5}
{"name":"Bob", "age":6}
EOF

mkdir: ./res: File exists


In [4]:
seq.jsonl('./res/json_lines.json').to_list()

[{'age': 5, 'name': 'Alice'}, {'age': 6, 'name': 'Bob'}]

## open

In [6]:
%%bash
cat << EOF > ./res/csv_file.csv
name,age
Alice,5
Bob,6
EOF

In [10]:
seq.open('./res/csv_file.csv', delimiter='\n').to_list()

['name,age', 'Alice,5', 'Bob,6', '']

# stream

## range

In [15]:
seq.range(5)

[0, 1, 2, 3, 4]

In [14]:
print( seq.range(5) == seq(range(5)) )

True


## construct by args

In [36]:
seq(1,2,3) == seq([1,2,3]) # 传进来的是完整的list或者逐个元素，都可以

True

## ~~accumulate~~ / aggregate

accumulate是aggregate的一个特例；所以已经deprecated  
aggregate支持1～3个参数，分别代表

* arg1:  init_value=None， 聚合开始时所用的初始值
* arg2:  fn, (current, next) ==> result， 两两聚合所用的函数
* arg3:  agg_func=None， 在返回结果前执行的最后一个映射

In [27]:
import operator
seq.range(5).aggregate(operator.add) 

10

In [28]:
seq.range(5).aggregate(-2, operator.add)

8

In [29]:
seq.range(5).aggregate(-2, operator.add, str)

'8'

## all/any

In [37]:
seq(False, True).all()

False

In [38]:
seq(False, True).any()

True

## average

In [42]:
from fn import _ as X
f = X**2
seq.range(5).average(f) # f 是取平均之前的一轮映射，可以省略
# [0,1,2,3,4] ==> [0,1,4,9,16] ==> 6.0

6.0

## cartesian

In [47]:
s1 = range(3)
s2 = set('abcde')
seq(s1).cartesian(s2).to_list() # 求s1与s2的笛卡尔积

[(0, 'b'),
 (0, 'c'),
 (0, 'e'),
 (0, 'a'),
 (0, 'd'),
 (1, 'b'),
 (1, 'c'),
 (1, 'e'),
 (1, 'a'),
 (1, 'd'),
 (2, 'b'),
 (2, 'c'),
 (2, 'e'),
 (2, 'a'),
 (2, 'd')]

## count / ~~count_by_key~~ / ~~count_by_value~~

In [53]:
f = X%2==0
seq.range(5).count(f) # 这个f必填

3

In [73]:
# count_by_key 已经过时，用 reduce_by_key近似
seq([('a', 1), ('b', 2), ('b', 3), ('b', 4), ('c', 3), ('c', 0)]) .reduce_by_key(operator.add).to_list()

[('b', 9), ('c', 3), ('a', 1)]

In [75]:
# count_by_value不可用，用Counter近似
from collections import Counter
s = seq(['a', 'a', 'a', 'b', 'b', 'c', 'd'])
Counter(s)

Counter({'a': 3, 'b': 2, 'c': 1, 'd': 1})

## dict

In [120]:
import numpy as np

# 可以直接传固定值，也可以传default_value_func，就像 defaultdict(default_value_func) 一样

# d = seq([('a', 1), ('b', 2)]).dict('nan')
d = seq([('a', 1), ('b', 2)]).dict(np.random.rand)
d

defaultdict(<function RandomState.rand>, {'a': 1, 'b': 2})

In [121]:
d['a'], d['c'],d['d'],d['e'] # 每次求新值的时候，要运行一下defaultdict的init_value函数

(1, 0.8561821151542345, 0.24597282379588725, 0.5991462259078539)

In [122]:
d['a'], d['c'],d['d'],d['e'] # 多次执行有缓存

(1, 0.8561821151542345, 0.24597282379588725, 0.5991462259078539)

## difference

In [125]:
seq(1,2,3).difference([2,3,4])

[1]

## distinct / distinctby

In [128]:
seq([1, 1, 2, 3, 3, 3, 4]).distinct()

[1, 2, 3, 4]

In [131]:
f = X%3
seq([1, 1, 2, 3, 3, 3, 4]).distinct_by(f) # f 就是hashfunc， hash值相同的多个元素，只取第一个

[3, 1, 2]

## drop / drop_right / drop_while

In [133]:
seq([1, 2, 3, 4, 5]).drop(2) # 去掉开头的2个元素

[3, 4, 5]

In [134]:
seq([1, 2, 3, 4, 5]).drop_right(2) # 去掉结尾的2个元素

[1, 2, 3]

In [136]:
seq([1, 2, 3, 4, 5, 9, 2]).drop_while(X < 3) # 一直drop，直到遇到条件为False

[3, 4, 5, 9, 2]

## empty

In [141]:
seq([]).empty(), seq([1,2]).empty()

(True, False)

## exists / for_all

In [149]:
seq(1,2).exists(X>=2)

True

In [174]:
def is_even_log(n):
    print('log: {}'.format(n))
    return n%2==0

seq.range(10).for_all(is_even_log) # for_all是当且仅当序列中的全部元素都能算出True时，才为True；从log看，有lazy_eval

log: 0
log: 1


False

## filter / filter_not

In [150]:
seq(1,2,3).filter(X%2==0)

[2]

In [151]:
seq(1,2,3).filter_not(X%2==0)

[1, 3]

## find / first

In [153]:
seq(1,2,3).find(X%2==0) # 找到序列中第1个满足条件的元素

2

In [154]:
seq(1,2,3).first() # 返回序列中的第1个元素

1

## flatmap / flatten

In [158]:
seq([[1, 2], [3, 4], [5, 6]]).flatten() # 将 arr_or_arr 展开打平 

[1, 2, 3, 4, 5, 6]

In [163]:
arr_of_arr = [[1, 2, 3], [3, 4], [5, 6]]
fn = X
seq(arr_of_arr).flat_map(lambda a: [min(a), ]*9 )
# 对arr_of_arr中的每个子arr，执行fn映射，得到新的子序列（记作ARR）；然后对ARR组成的大的序列执行flatten

[1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5, 5]

## fold_left / fold_right

In [169]:
seq.range(3).fold_left('a', lambda c,n: '{}_{}'.format(c,n) ) #  从起始值开始， 逐个调用传入的fold函数

'a_0_1_2'

In [172]:
seq.range(3).fold_right('a', lambda n,c: '{}_{}'.format(c,n) ) #  从起始值开始， 逐个调用传入的fold函数； 注意lambda中两个参数的顺序互换了

'a_2_1_0'

## group / group_by_key / grouped

In [175]:
seq(["abc", "ab", "z", "f", "qw"]).group_by(len)

0,1
1,"['z', 'f']"
2,"['ab', 'qw']"
3,['abc']


In [176]:
seq([('a', 1), ('b', 2), ('b', 3), ('b', 4), ('c', 3), ('c', 0)]).group_by_key()

0,1
b,"[2, 3, 4]"
c,"[3, 0]"
a,[1]


In [186]:
seq([1, 2, 3, 4, 5, 6, 7, 8]).grouped(3).map(list).to_list() # 想念元素大致分组

[[1, 2, 3], [4, 5, 6], [7, 8]]