# 5.3Data manipulation

In [12]:
# port.py

import csv

def read_portfolio(filename, *, errors='warn'):
    '''
    Read a CSV file with name, date, shares, price data into a list. 
    '''
    if errors not in { 'warn', 'silent', 'raise' }:
        raise ValueError("errors must be one of 'warn', 'silent', 'raise'")

    portfolio = []      # List of records
    with open(filename, 'r') as f:
        rows = csv.reader(f)
        headers = next(rows)   # Skip the header row
        for rowno, row in enumerate(rows, start=1):
            try:
                row[2] = int(row[2])
                row[3] = float(row[3])
            except ValueError as err:   
                if errors == 'warn':
                    print('Row:', rowno, 'Bad row:', row)
                    print('Row:', rowno, 'Reason:', err)
                elif errors == 'raise':
                    raise    # Reraises the last exception
                else:
                    pass     # Ignore
                continue    # Skips to the next row
            # record = tuple(row)相比于元组，字典更加方便观看
            record = {
                'name': row[0],
                'date': row[1],
                'shares' : row[2],
                'price': row[3]
                }
            portfolio.append(record)
    return portfolio

portfolio = read_portfolio('Data/portfolio.csv')

In [13]:
portfolio

[{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2},
 {'name': 'IBM', 'date': '2007-05-13', 'shares': 50, 'price': 91.1},
 {'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44},
 {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23},
 {'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37},
 {'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1},
 {'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}]

In [14]:
for holding in portfolio: 
    print(holding)

{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}
{'name': 'IBM', 'date': '2007-05-13', 'shares': 50, 'price': 91.1}
{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}
{'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}
{'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}
{'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1}
{'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}


### 计算总数

In [16]:
total = 0.0
for holding in portfolio:
    total += holding['shares']*holding['price']
total

44671.15

### 得到每一行数据的名字

In [17]:
names = []
for holding in portfolio:
    names.append(holding['name'])
names

['AA', 'IBM', 'CAT', 'MSFT', 'GE', 'MSFT', 'IBM']

### 得到大于100的股票

In [18]:
more100 = []
for holding in portfolio:
    if holding['shares'] > 100:
        more100.append(holding)
more100

[{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44},
 {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}]

### 一行数据实现相同的功能

In [19]:
total = sum([holding['shares']*holding['price'] for holding in portfolio])
total

44671.15

In [20]:
names = [holding['name'] for holding in portfolio]
names

['AA', 'IBM', 'CAT', 'MSFT', 'GE', 'MSFT', 'IBM']

In [21]:
more100 = [holding for holding in portfolio if holding['shares'] > 100]
more100

[{'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44},
 {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}]

In [22]:
more100 = [holding['name'] for holding in portfolio if holding['shares']>100]
more100

['CAT', 'MSFT']

### 数据去重

In [23]:
names = [holding['name'] for holding in portfolio]
print(names)
unique_names = set(names)
print(unique_names)
unique_names = {holding['name'] for holding in portfolio} #一行代码实现
unique_names

['AA', 'IBM', 'CAT', 'MSFT', 'GE', 'MSFT', 'IBM']
{'IBM', 'MSFT', 'CAT', 'GE', 'AA'}


{'AA', 'CAT', 'GE', 'IBM', 'MSFT'}

### 将集合数据变成字符串

In [24]:
namestr = ','.join(unique_names)
namestr

'IBM,MSFT,CAT,GE,AA'

### 爬虫拿到对应的数据

In [25]:
#namestr = ','.join(unique_names)
#namestr
#import urllib.request
#u = urllib.request.urlopen('http://finance.yahoo.com/d/quotes.csv?s={}&f=l1'.format(namestr))
#data = u.read()
#data
#这个爬虫不知道为什么老是报错，直接给出结果爬虫的结果吧：
data = b'72.51\n9.27\n153.74\n30.23\n53.00\n'

In [28]:
pricedata = data.split()
print(pricedata)
unique_names

[b'72.51', b'9.27', b'153.74', b'30.23', b'53.00']


{'AA', 'CAT', 'GE', 'IBM', 'MSFT'}

### zip function的应用

In [29]:
for name, price in zip(unique_names, pricedata):   #问题：name和price怎么正确对应呢？
    print(name, '=', price)

IBM = b'72.51'
MSFT = b'9.27'
CAT = b'153.74'
GE = b'30.23'
AA = b'53.00'


In [30]:
prices = dict(zip(unique_names, pricedata))
prices

{'IBM': b'72.51',
 'MSFT': b'9.27',
 'CAT': b'153.74',
 'GE': b'30.23',
 'AA': b'53.00'}

In [31]:
prices = {name: float(prices) for name, prices in zip(unique_names, pricedata)}
prices

{'IBM': 72.51, 'MSFT': 9.27, 'CAT': 153.74, 'GE': 30.23, 'AA': 53.0}

In [38]:
current_value = 0.0
portfolio = read_portfolio('Data/portfolio.csv')
print(portfolio)
for holding in portfolio:
    current_value += holding['shares']*prices[holding['name']]
current_value

[{'name': 'AA', 'date': '2007-06-11', 'shares': 100, 'price': 32.2}, {'name': 'IBM', 'date': '2007-05-13', 'shares': 50, 'price': 91.1}, {'name': 'CAT', 'date': '2006-09-23', 'shares': 150, 'price': 83.44}, {'name': 'MSFT', 'date': '2007-05-17', 'shares': 200, 'price': 51.23}, {'name': 'GE', 'date': '2006-02-01', 'shares': 95, 'price': 40.37}, {'name': 'MSFT', 'date': '2006-10-31', 'shares': 50, 'price': 65.1}, {'name': 'IBM', 'date': '2006-07-09', 'shares': 100, 'price': 70.44}]


44426.85

In [34]:
current_value = sum([holding['shares']*prices[holding['name']] for holding in portfolio])
current_value

44426.85