### pandas

#### 1、Series

In [36]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame

In [None]:
# ①用数组生成Series,并获取索引和数据
obj = Series([4, 7, -5, 3])
print(obj)
print(obj.index)
print(obj.values)
print(obj.head(2))

In [None]:
# ②指定Series的index
obj2 = Series([4, 7, -5, 3], index = ['d', 'b', 'a', 'c'])
print(obj2)

In [None]:
# ③使用字典生成Series
sdata = {'Ohio':45000, 'Texas':71000, 'Oregon':16000, 'Utah':5000}
obj3 = Series(sdata)
print(obj3)

In [None]:
# ④使用字典生成Series，并额外指定index，不匹配部分为NaN
states = ['California', 'Ohio', 'Oregon', 'Texas']
obj4 = Series(sdata, index = states)
print(obj4)

In [None]:
# ⑤Series相加，相同索引部分相加
print(obj3 + obj4)

In [None]:
# ⑥指定Series及其索引的名字
obj4.name = 'population'
obj4.index.name = 'state'
print(obj4)

In [None]:
# ⑦替换index
obj.index = ['Bob', 'Steve', 'Jeff', 'Ryan']
print(obj)

#### 2、DataFrame

In [None]:
# ①用字典生成DataFrame，key为列的名字
data = {'state':['Ohio', 'Ohio', 'Ohio', 'Nevada', 'Nevada'],
         'year':[2000, 2001, 2002, 2001, 2002],
         'pop':[1.5, 1.7, 3.6, 2.4, 2.9]}
print(DataFrame(data))
print(DataFrame(data, columns = ['year', 'state', 'pop'])) # 指定列顺序

In [None]:
# ②指定索引，在列中指定不存在的列，默认数据用NaN
frame2 = DataFrame(data,
                    columns = ['year', 'state', 'pop', 'debt'],
                    index = ['one', 'two', 'three', 'four', 'five'])
# print(frame2)
# print(frame2['state'])
# print(frame2.year)
# print(frame2.ix['three'])
# frame2['debt'] = 16.5 # 修改一整列
frame2.debt = np.arange(5)  # 用numpy数组修改元素
print(frame2)

In [None]:
# ③用Series指定要修改的索引及其对应的值，没有指定的默认数据用NaN
print(frame2)
val = Series([-1.2, -1.5, -1.7], index = ['two', 'four', 'five'])
frame2['debt'] = val
print(frame2)

In [None]:
# ④赋值给新列
frame2['eastern'] = (frame2.state == 'Ohio')  # 如果state等于Ohio为True
print(frame2)
print(frame2.columns)

In [None]:
# ⑤DataFrame转置
pop = {'Nevada':{2001:2.4, 2002:2.9},
        'Ohio':{2000:1.5, 2001:1.7, 2002:3.6}}
frame3 = DataFrame(pop)
print(frame3)
print(frame3.T)

In [None]:
# ⑥指定索引顺序，以及使用切片初始化数据
print(DataFrame(pop, index = [2001, 2002, 2003]))
print(frame3['Ohio'][:-1])
print(frame3['Nevada'][:2])
pdata = {'Ohio':frame3['Ohio'][:-1], 'Nevada':frame3['Nevada'][:2]}
print(DataFrame(pdata))

In [None]:
# ⑦指定索引和列的名称
frame3.index.name = 'year'
frame3.columns.name = 'state'
print(frame3)
print(frame3.values)
print(frame2.values)

#### 3、pandas数据操作
##### 3-1 重新索引

In [3]:
# ①重新指定索引及顺序-reindex。
print('①重新指定索引及顺序')
obj = Series([4.5, 7.2, -5.3, 3.6], index = ['d', 'b', 'a', 'c'])
print(obj)
obj2 = obj.reindex(['a', 'b', 'd', 'c', 'e'])
print(obj2)
print(obj.reindex(['a', 'b', 'd', 'c', 'e'], fill_value = 0))  # 指定不存在元素的默认值

d    4.5
b    7.2
a   -5.3
c    3.6
dtype: float64
a   -5.3
b    7.2
d    4.5
c    3.6
e    NaN
dtype: float64
a   -5.3
b    7.2
d    4.5
c    3.6
e    0.0
dtype: float64


In [4]:
# ②重新指定索引并指定元素填充方法
print('②重新指定索引并指定元素填充方法')
obj3 = Series(['blue', 'purple', 'yellow'], index = [0, 1, 4])
print(obj3)
print(obj3.reindex(range(6), method = 'ffill')) #ffill可以实现前向值填充

②重新指定索引并指定元素填充方法
0      blue
1    purple
4    yellow
dtype: object
0      blue
1    purple
2    purple
3    purple
4    yellow
5    yellow
dtype: object


In [6]:
print('③对DataFrame重新指定索引')
frame = DataFrame(np.arange(9).reshape(3, 3),
                  index = ['a', 'c', 'd'],
                  columns = ['Ohio', 'Texas', 'California'])
print(frame)
frame2 = frame.reindex(['a', 'b', 'c', 'd'],fill_value=0)
print(frame2)

③对DataFrame重新指定索引
   Ohio  Texas  California
a     0      1           2
c     3      4           5
d     6      7           8
   Ohio  Texas  California
a     0      1           2
b     0      0           0
c     3      4           5
d     6      7           8


In [7]:
print('④重新指定columns') #使用columns关键字即可重新索引列
states = ['Texas', 'Utah', 'California']
print(frame.reindex(columns = states))

④重新指定columns
   Texas  Utah  California
a      1   NaN           2
c      4   NaN           5
d      7   NaN           8


In [8]:
print('⑤对DataFrame重新指定索引并指定填元素充方法')
print(frame.reindex(index = ['a', 'b', 'c', 'd'],
                    method = 'ffill',
                    columns = states))
print(frame.ix[['a', 'b', 'd', 'c'], states])


⑤对DataFrame重新指定索引并指定填元素充方法
   Texas  Utah  California
a      1   NaN           2
b      1   NaN           2
c      4   NaN           5
d      7   NaN           8
   Texas  Utah  California
a    1.0   NaN         2.0
b    NaN   NaN         NaN
d    7.0   NaN         8.0
c    4.0   NaN         5.0


#### 3-2 丢弃指定轴上的项

In [9]:
print('①Series根据索引删除元素')
obj = Series(np.arange(5.), index = ['a', 'b', 'c', 'd', 'e'])
new_obj = obj.drop('c')
print(new_obj)
print(obj.drop(['d', 'c']))

①Series根据索引删除元素
a    0.0
b    1.0
d    3.0
e    4.0
dtype: float64
a    0.0
b    1.0
e    4.0
dtype: float64


In [10]:
print('②DataFrame删除元素，可指定索引或列。')
data = DataFrame(np.arange(16).reshape((4, 4)),
                  index = ['Ohio', 'Colorado', 'Utah', 'New York'],
                  columns = ['one', 'two', 'three', 'four'])
print(data)
print(data.drop(['Colorado', 'Ohio']))
print(data.drop('two', axis = 1))
print(data.drop(['two', 'four'], axis = 1))

②DataFrame删除元素，可指定索引或列。
          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
          one  two  three  four
Utah        8    9     10    11
New York   12   13     14    15
          one  three  four
Ohio        0      2     3
Colorado    4      6     7
Utah        8     10    11
New York   12     14    15
          one  three
Ohio        0      2
Colorado    4      6
Utah        8     10
New York   12     14


#### 3-3 索引、选取和过滤

In [11]:
print('①Series的索引，默认数字索引可以工作。')
obj = Series(np.arange(4.), index = ['a', 'b', 'c', 'd'])
print(obj)
print(obj['b'])
print(obj[3])
print(obj[[1, 3]])
print(obj[obj < 2])

①Series的索引，默认数字索引可以工作。
a    0.0
b    1.0
c    2.0
d    3.0
dtype: float64
1.0
3.0
b    1.0
d    3.0
dtype: float64
a    0.0
b    1.0
dtype: float64


In [12]:
print('②Series的数组切片')
print(obj['b':'c'])  # 闭区间,这一点和python不同
obj['b':'c'] = 5
print(obj)

②Series的数组切片
b    1.0
c    2.0
dtype: float64
a    0.0
b    5.0
c    5.0
d    3.0
dtype: float64


In [13]:
print('③DataFrame的索引')
data = DataFrame(np.arange(16).reshape((4, 4)),
                  index = ['Ohio', 'Colorado', 'Utah', 'New York'],
                  columns = ['one', 'two', 'three', 'four'])
print(data)
print(data['two']) # 打印列
print(data[['three', 'one']])
print(data[:2])
print(data.ix['Colorado', ['two', 'three']]) # 指定索引和列
print(data.ix[['Colorado', 'Utah'], [3, 0, 1]])
print(data.ix[2])  # 打印第2行（从0开始）
print(data.ix[:'Utah', 'two']) # 从开始到Utah，第2列。

③DataFrame的索引
          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
Ohio         1
Colorado     5
Utah         9
New York    13
Name: two, dtype: int32
          three  one
Ohio          2    0
Colorado      6    4
Utah         10    8
New York     14   12
          one  two  three  four
Ohio        0    1      2     3
Colorado    4    5      6     7
two      5
three    6
Name: Colorado, dtype: int32
          four  one  two
Colorado     7    4    5
Utah        11    8    9
one       8
two       9
three    10
four     11
Name: Utah, dtype: int32
Ohio        1
Colorado    5
Utah        9
Name: two, dtype: int32


In [14]:
print('④根据条件选择')
print(data[data.three > 5])
print(data < 5)  # 打印True或者False
data[data < 5] = 0
print(data)

④根据条件选择
          one  two  three  four
Colorado    4    5      6     7
Utah        8    9     10    11
New York   12   13     14    15
            one    two  three   four
Ohio       True   True   True   True
Colorado   True  False  False  False
Utah      False  False  False  False
New York  False  False  False  False
          one  two  three  four
Ohio        0    0      0     0
Colorado    0    5      6     7
Utah        8    9     10    11
New York   12   13     14    15


##### 小结 DataFrame的索引选项

|类型         | 说明      | 
|:------------------------ |:-------------|
|obj[val]       |选取DataFrame的单个列或一组列。在一些特殊情况下会比较便利：布尔型数组（过滤 行）、切片（行切片）、布尔型DataFrame（根据条件设置值）。  |
|obj.ix[val]     |选取DataFrame的单个行或一组行       | 
|obj.ix[:, val]   |选取单个列或列子集       |
|obj.ix[val1, val] |同时选取行或列   |
|reindex方法     |将一个或多个轴匹配到新索引   |
|xs方法        |根据标签选取单行或单列，并返回一个Series  |
|icol、irow方法   |根据整数位置选取单行或单列，并返回一个Series  |
|get_value、set_value方法 |根据行标签或列标签选取单个值  |

#### 3-4 算术运算和数据对齐


* 对不同的索引对象进行算术运算 
* 自动数据对齐在不重叠的索引处引入了NA值，缺失值会在算术运算过程中传播
* 对于DataFrame，对齐操作会同时发生在行和列上
* fill_value参数
* DataFrame和Series之间的运算 

In [15]:
print('①Series的加法')
s1 = Series([7.3, -2.5, 3.4, 1.5], index = ['a', 'c', 'd', 'e'])
s2 = Series([-2.1, 3.6, -1.5, 4, 3.1], index = ['a', 'c', 'e', 'f', 'g'])
print(s1)
print(s2)
print(s1 + s2)

①Series的加法
a    7.3
c   -2.5
d    3.4
e    1.5
dtype: float64
a   -2.1
c    3.6
e   -1.5
f    4.0
g    3.1
dtype: float64
a    5.2
c    1.1
d    NaN
e    0.0
f    NaN
g    NaN
dtype: float64


In [16]:
print('②DataFrame加法，索引和列都必须匹配')
df1 = DataFrame(np.arange(9.).reshape((3, 3)),
                columns = list('bcd'),
                index = ['Ohio', 'Texas', 'Colorado'])
df2 = DataFrame(np.arange(12).reshape((4, 3)),
                columns = list('bde'),
                index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
print(df1)
print(df2)
print(df1 + df2)

②DataFrame加法，索引和列都必须匹配
            b    c    d
Ohio      0.0  1.0  2.0
Texas     3.0  4.0  5.0
Colorado  6.0  7.0  8.0
        b   d   e
Utah    0   1   2
Ohio    3   4   5
Texas   6   7   8
Oregon  9  10  11
            b   c     d   e
Colorado  NaN NaN   NaN NaN
Ohio      3.0 NaN   6.0 NaN
Oregon    NaN NaN   NaN NaN
Texas     9.0 NaN  12.0 NaN
Utah      NaN NaN   NaN NaN


In [17]:
print('③数据填充')
df1 = DataFrame(np.arange(12.).reshape((3, 4)), columns = list('abcd'))
df2 = DataFrame(np.arange(20.).reshape((4, 5)), columns = list('abcde'))
print(df1)
print(df2)
print(df1.add(df2, fill_value = 0))
print(df1.reindex(columns = df2.columns, fill_value = 0))

③数据填充
     a    b     c     d
0  0.0  1.0   2.0   3.0
1  4.0  5.0   6.0   7.0
2  8.0  9.0  10.0  11.0
      a     b     c     d     e
0   0.0   1.0   2.0   3.0   4.0
1   5.0   6.0   7.0   8.0   9.0
2  10.0  11.0  12.0  13.0  14.0
3  15.0  16.0  17.0  18.0  19.0
      a     b     c     d     e
0   0.0   2.0   4.0   6.0   4.0
1   9.0  11.0  13.0  15.0   9.0
2  18.0  20.0  22.0  24.0  14.0
3  15.0  16.0  17.0  18.0  19.0
     a    b     c     d  e
0  0.0  1.0   2.0   3.0  0
1  4.0  5.0   6.0   7.0  0
2  8.0  9.0  10.0  11.0  0


In [18]:
print('④DataFrame与Series之间的操作')
arr = np.arange(12.).reshape((3, 4))
print(arr)
print(arr[0])
print(arr - arr[0])
frame = DataFrame(np.arange(12).reshape((4, 3)),
                  columns = list('bde'),
                  index = ['Utah', 'Ohio', 'Texas', 'Oregon'])
series = frame.ix[0]
print(frame)
print(series)
print(frame - series)
series2 = Series(range(3), index = list('bef'))
print(frame + series2)
series3 = frame['d']
print(frame.sub(series3, axis = 0))  # 按列减

④DataFrame与Series之间的操作
[[  0.   1.   2.   3.]
 [  4.   5.   6.   7.]
 [  8.   9.  10.  11.]]
[ 0.  1.  2.  3.]
[[ 0.  0.  0.  0.]
 [ 4.  4.  4.  4.]
 [ 8.  8.  8.  8.]]
        b   d   e
Utah    0   1   2
Ohio    3   4   5
Texas   6   7   8
Oregon  9  10  11
b    0
d    1
e    2
Name: Utah, dtype: int32
        b  d  e
Utah    0  0  0
Ohio    3  3  3
Texas   6  6  6
Oregon  9  9  9
          b   d     e   f
Utah    0.0 NaN   3.0 NaN
Ohio    3.0 NaN   6.0 NaN
Texas   6.0 NaN   9.0 NaN
Oregon  9.0 NaN  12.0 NaN
        b  d  e
Utah   -1  0  1
Ohio   -1  0  1
Texas  -1  0  1
Oregon -1  0  1


#### 3-5 函数应用和映射

* numpy的ufuncs（元素级数组方法） 
* DataFrame的apply方法 
* 对象的applymap方法（因为Series有一个应用于元素级的map方法） 

In [19]:
# 1、函数
frame = DataFrame(np.random.randn(4, 3),
                  columns = list('bde'),
                  index = ['Utah', 'Ohio', 'Texas', 'Oregon']
                    )
print(frame)
print(np.abs(frame))

               b         d         e
Utah    0.051423 -0.190328  0.173164
Ohio    0.561178  0.504314  0.098294
Texas  -1.166611  0.771533 -2.385577
Oregon  1.428450 -0.188436 -1.032306
               b         d         e
Utah    0.051423  0.190328  0.173164
Ohio    0.561178  0.504314  0.098294
Texas   1.166611  0.771533  2.385577
Oregon  1.428450  0.188436  1.032306


In [21]:
# 2、lambda及其应用
f = lambda x:x.max()- x.min()
print(frame.apply(f)) #从列方向开始运算
def f(x):
    return Series([x.min(), x.max()], index = ['min', 'max'])
print(frame.apply(f))

b    2.595061
d    0.961861
e    2.558741
dtype: float64
            b         d         e
min -1.166611 -0.190328 -2.385577
max  1.428450  0.771533  0.173164


In [22]:
# 3、applymap和map
_format = lambda x: '%.2f' % x
print(frame.applymap(_format))
print(frame['e'].map(_format))

            b      d      e
Utah     0.05  -0.19   0.17
Ohio     0.56   0.50   0.10
Texas   -1.17   0.77  -2.39
Oregon   1.43  -0.19  -1.03
Utah       0.17
Ohio       0.10
Texas     -2.39
Oregon    -1.03
Name: e, dtype: object


#### 3-6 排序和排名


* 对行或列索引进行排序 
* 对于DataFrame，根据任意一个轴上的索引进行排序 
* 可以指定升序降序 
* 按值排序 
* 对于DataFrame，可以指定按值排序的列 
*  rank函数 

In [24]:
# 1、根据索引排序，对于DataFrame可以指定轴
obj = Series(range(4), index = ['d', 'a', 'b', 'c'])
print(obj.sort_index())
frame = DataFrame(np.arange(8).reshape((2, 4)),
                  index = ['three', 'one'],
                  columns = list('dabc'))
print(frame.sort_index())
print(frame.sort_index(axis = 0))
print(frame.sort_index(axis = 1, ascending = False)) # 降序

a    1
b    2
c    3
d    0
dtype: int32
       d  a  b  c
one    4  5  6  7
three  0  1  2  3
       d  a  b  c
one    4  5  6  7
three  0  1  2  3
       d  c  b  a
three  0  3  2  1
one    4  7  6  5


In [25]:
# 2、根据值排序
obj = Series([4, 7, -3, 2])
print(obj.sort_values()) # order已淘汰

2   -3
3    2
0    4
1    7
dtype: int64


In [28]:
# 3、值排序,DataFrame指定列排序
frame = DataFrame({'b':[4, 7, -3, 2], 'a':[0, 1, 0, 1]},columns=['b','a'])
print(frame)
print(frame.sort_values(by = 'b')) # sort_index(by = ...)已淘汰
print(frame.sort_values(by = ['a', 'b']))

   b  a
0  4  0
1  7  1
2 -3  0
3  2  1
   b  a
2 -3  0
3  2  1
0  4  0
1  7  1
   b  a
2 -3  0
0  4  0
3  2  1
1  7  1


In [41]:
# 4、rank，求排名的平均位置(从1开始)
obj = Series([7, -5, 7, 4, 2, 0, 4])
# 对应排名：-5(1), 0(2), 2(3), 4(4), 4(5), 7(6), 7(7)
print(obj.rank())
print(obj.rank(method = 'first'))  # 去第一次出现，不求平均值。
print(obj.rank(ascending = False, method = 'max')) # 逆序，并取最大值。所以-5的rank是7.
frame = DataFrame({'b':[4.3, 7, -3, 2],
                  'a':[0, 1, 0, 1],
                  'c':[-2, 5, 8, -2.5]})
print(frame)
print(frame.rank(axis = 1))

0    6.5
1    1.0
2    6.5
3    4.5
4    3.0
5    2.0
6    4.5
dtype: float64
0    6.0
1    1.0
2    7.0
3    4.0
4    3.0
5    2.0
6    5.0
dtype: float64
0    2.0
1    7.0
2    2.0
3    4.0
4    5.0
5    6.0
6    4.0
dtype: float64
   a    b    c
0  0  4.3 -2.0
1  1  7.0  5.0
2  0 -3.0  8.0
3  1  2.0 -2.5
     a    b    c
0  2.0  3.0  1.0
1  1.0  3.0  2.0
2  2.0  1.0  3.0
3  2.0  3.0  1.0


#### 3-7 带有重复值的索引
* 对于重复索引，返回Series，对应单个值的索引则返回标量

In [50]:
# ①重复的索引
obj = Series(range(5,10), index = ['a', 'a', 'b', 'b', 'c'])
print(obj)
print(obj.index.is_unique) # 判断是非有重复索引
print(obj.ix['b'].ix[0],obj.ix['b'].ix[1])
df = DataFrame(np.random.randn(4, 3), index = ['a', 'a', 'b', 'b'])
print(df)
print(df.ix['b'].ix[0])
print(df.ix['b'].ix[1])

a    5
a    6
b    7
b    8
c    9
dtype: int32
False
7 8
          0         1         2
a -0.356556 -0.331428  0.843001
a -0.515676 -0.055060 -0.781903
b -1.504472  0.486719 -0.884501
b -0.287172 -0.425806 -1.683911
0   -1.504472
1    0.486719
2   -0.884501
Name: b, dtype: float64
0   -0.287172
1   -0.425806
2   -1.683911
Name: b, dtype: float64


### 4、汇总和计算描述统计


#### ①常用方法选项 
|类型         | 说明      | 
|:------------------------ |:-------------|
|axis       |指定轴，DataFrame的行用0，列用1 |
|skipna      |排除缺失值，默认值为True       | 
|level      |如果轴是层次化索引的（即MultiIndex），则根据level选取分组 | 

#### ②常用描述和汇总统计函数  I 
|类型         | 说明      | 
|:------------------------ |:-------------|
|count       |非NA值的数量 |
|describe      |针对Series或各DataFrame列计算汇总统计 | 
|min, max      |计算最小值和最大值  | 
|argmin, argmax  |计算能够获取到最小值和最大值的索引位置（整数）   |
|idxmin, idxmax      |计算能够获取到最小值和最大值的索引值   |
|sum      |值的总和   |
|mean      |值的平均数   |
|median      |值的算术中位数   |
|mad      |根据平均值计算平均绝对离差  |

#### ③常用描述和汇总统计函数  II 
|类型         | 说明      | 
|:------------------------ |:-------------|
|var       |样本值的方差  |
|std      |样本值的标准差        | 
|skew      |样本值的偏度（三阶矩）  | 
|kurt      |样本值的偏度（四阶矩）   | 
|cumsum      |样本值的累计和   | 
|cummin, cummax      |样本值的累计最大值和累计最小值   | 
|cumprod      |样本值的累计积   | 
|diff      |计算一阶差分   | 
|pct_change      |计算百分数变化  | 

* 数值型和非数值型的区别 
* NA值被自动排查，除非通过skipna选项 

In [52]:
print('①求和')
df = DataFrame([[1.4, np.nan], [7.1, -4.5], [np.nan, np.nan], [0.75, -1.3]],
              index = ['a', 'b', 'c', 'd'],
              columns = ['one', 'two'])
print(df)
print(df.sum())  # 按列求和
print(df.sum(axis = 1))  # 按行求和

①求和
    one  two
a  1.40  NaN
b  7.10 -4.5
c   NaN  NaN
d  0.75 -1.3
one    9.25
two   -5.80
dtype: float64
a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64


In [53]:
print('②平均数')
print(df.mean(axis = 1, skipna = False)) #skipna排除缺失值，默认是True
print(df.mean(axis = 1))

②平均数
a      NaN
b    1.300
c      NaN
d   -0.275
dtype: float64
a    1.400
b    1.300
c      NaN
d   -0.275
dtype: float64


In [54]:
print('③其它')
print(df.idxmax())
print(df.cumsum())
print(df.describe())
obj = Series(['a', 'a', 'b', 'c'] * 4)
print(obj.describe())

③其它
one    b
two    d
dtype: object
    one  two
a  1.40  NaN
b  8.50 -4.5
c   NaN  NaN
d  9.25 -5.8
            one       two
count  3.000000  2.000000
mean   3.083333 -2.900000
std    3.493685  2.262742
min    0.750000 -4.500000
25%    1.075000 -3.700000
50%    1.400000 -2.900000
75%    4.250000 -2.100000
max    7.100000 -1.300000
count     16
unique     3
top        a
freq       8
dtype: object


#### 4-1 相关系数与协方差
* 相关系数：相关系数是用以反映变量之间相关关系密切程度的统计指标
* 协方差：从直观上来看，协方差表示的是两个变量总体误差的期望。如果两个 变量的变化趋势一致，也就是说如果其中一个大于自身的期望值时另外一个也 大于自身的期望值，那么两个变量之间的协方差就是正值；如果两个变量的变 化趋势相反，即其中一个变量大于自身的期望值时另外一个却小于自身的期望 值，那么两个变量之间的协方差就是负值。 