#  Guided Project - Analyzing Stock Prices 

## Stock Price Data

In [1]:
from concurrent.futures import ThreadPoolExecutor
import csv
import os

stock_price = dict()

def reading_file(name):
    filename='/'.join(['prices',name])
    with open(filename,'r',encoding='utf-8') as f:
        next(f)
        data = list(csv.reader(f))
        return (name,data)

list_file = os.listdir('prices')        
        
with ThreadPoolExecutor(max_workers=5) as pool:
    list_data = list(pool.map(reading_file,list_file))
    
for item in list_data:
    name=item[0][:-4]
    stock_price[name] = item[1]
        

In [2]:
columns = ['date', 'close', 'open', 'high', 'low', 'volume']
companies = list(stock_price.keys())

In [3]:
companies[:10]

['afsi',
 'cidm',
 'dvcr',
 'cfbk',
 'expd',
 'cern',
 'aaxn',
 'cetv',
 'cthr',
 'chfn']

In [4]:
stock_price['aapl']

[['2007-01-03',
  '83.800002',
  '86.289999',
  '86.579999',
  '81.899999',
  '309579900'],
 ['2007-01-04',
  '85.659998',
  '84.050001',
  '85.949998',
  '83.820003',
  '211815100'],
 ['2007-01-05', '85.049997', '85.77', '86.199997', '84.400002', '208685400'],
 ['2007-01-08', '85.47', '85.959998', '86.529998', '85.280003', '199276700'],
 ['2007-01-09', '92.570003', '86.450003', '92.979999', '85.15', '837324600'],
 ['2007-01-10',
  '96.999997',
  '94.749999',
  '97.800002',
  '93.450003',
  '738220000'],
 ['2007-01-11', '95.800003', '95.94', '96.779999', '95.10', '360063200'],
 ['2007-01-12',
  '94.620003',
  '94.590002',
  '95.059999',
  '93.229998',
  '328172600'],
 ['2007-01-16', '97.099999', '95.68', '97.250003', '95.450002', '311019100'],
 ['2007-01-17',
  '94.949997',
  '97.560003',
  '97.599998',
  '94.820001',
  '411565000'],
 ['2007-01-18',
  '89.070003',
  '92.099998',
  '92.109999',
  '89.050002',
  '591151400'],
 ['2007-01-19',
  '88.500003',
  '88.629999',
  '89.650002',
 

## Computing Aggregates

In [14]:
dict_stock_price = dict()

for key,value in stock_price.items():
    dict_company = dict()
    
    
    dict_company[columns[0]] = [row[0] for row in value] 
    
    for col in columns[1:]:
        index = columns.index(col)
        dict_company[col] = [float(row[index]) for row in value] 
    
    dict_stock_price[key] = dict_company

dict_stock_price['aapl']    


{'close': [83.800002,
  85.659998,
  85.049997,
  85.47,
  92.570003,
  96.999997,
  95.800003,
  94.620003,
  97.099999,
  94.949997,
  89.070003,
  88.500003,
  86.789997,
  85.699999,
  86.700002,
  86.249998,
  85.379998,
  85.939998,
  85.550002,
  85.73,
  84.739997,
  84.749997,
  83.939999,
  84.149997,
  86.150002,
  86.180003,
  83.270003,
  84.88,
  84.700003,
  85.300003,
  85.210001,
  84.829999,
  85.899997,
  89.199999,
  89.509999,
  89.070003,
  88.510003,
  83.929998,
  84.610001,
  87.060003,
  85.409999,
  86.32,
  88.190002,
  87.719998,
  87.999998,
  87.969997,
  89.870001,
  88.400001,
  89.999997,
  89.570001,
  89.590001,
  91.130003,
  91.479998,
  93.869999,
  93.960001,
  93.519998,
  95.849998,
  95.460002,
  93.239998,
  93.750003,
  92.909997,
  93.650001,
  94.5,
  94.270001,
  94.679997,
  93.650001,
  94.250001,
  92.590003,
  92.190001,
  90.240003,
  91.430003,
  90.349998,
  90.4,
  90.270003,
  90.969999,
  93.509997,
  93.239998,
  95.349999,
  9

In [15]:
import statistics

columns

['date', 'close', 'open', 'high', 'low', 'volume']

In [21]:
dict_stats_company = dict()

for key,value in dict_stock_price.items():   
    stats_company = dict()
    
    for col in columns[1:]:
        stats=dict()
        
        stats['mean']= statistics.mean(value[col])        
        stats['stdev']= statistics.stdev(value[col])
        stats['median']= statistics.median(value[col])
        stats['median_grouped']= statistics.median_grouped(value[col])
        
        stats_company[col]=stats
    
    dict_stats_company[key]=stats_company
    

In [22]:
columns[1:]

['close', 'open', 'high', 'low', 'volume']

In [23]:
dict_stats_company

{'aal': {'close': {'mean': 22.07495366679537,
   'median': 14.91,
   'median_grouped': 14.42,
   'stdev': 16.366905147747204},
  'high': {'mean': 22.5200927034749,
   'median': 15.31,
   'median_grouped': 15.31,
   'stdev': 16.569768488623815},
  'low': {'mean': 21.650617742857143,
   'median': 14.585,
   'median_grouped': 14.12,
   'stdev': 16.163063007025777},
  'open': {'mean': 22.102324340540537,
   'median': 14.995000000000001,
   'median_grouped': 14.5,
   'stdev': 16.37618360332894},
  'volume': {'mean': 8469080.501930501,
   'median': 7240650.0,
   'median_grouped': 7245999.5,
   'stdev': 6189362.983753678}},
 'aame': {'close': {'mean': 2.7796795366795366,
   'median': 2.92,
   'median_grouped': 2.8745454545454545,
   'stdev': 1.2105007333737694},
  'high': {'mean': 2.8246293436293435,
   'median': 2.94,
   'median_grouped': 2.8945454545454545,
   'stdev': 1.2220319107108575},
  'low': {'mean': 2.7127142857142856,
   'median': 2.85,
   'median_grouped': 2.85,
   'stdev': 1.1934

In [28]:
stats_sort = sorted(dict_stats_company.keys(),
                    key=(lambda x:dict_stats_company[x]['close']['mean']),
                    reverse=True)

In [29]:
stats_sort

['amzn',
 'aapl',
 'cme',
 'atri',
 'fcnca',
 'bidu',
 'eqix',
 'biib',
 'esgr',
 'bbh',
 'djco',
 'dhil',
 'csgp',
 'anat',
 'alxn',
 'cost',
 'cacc',
 'amgn',
 'bwld',
 'ffiv',
 'celg',
 'algt',
 'coke',
 'cswc',
 'cbrl',
 'chdn',
 'fisv',
 'esrx',
 'cern',
 'alog',
 'acgl',
 'anss',
 'chrw',
 'adp',
 'asml',
 'eslt',
 'casy',
 'ctxs',
 'ctsh',
 'dltr',
 'bokf',
 'dwaq',
 'cpla',
 'expe',
 'colm',
 'cohr',
 'cvco',
 'adbe',
 'chkp',
 'cmpr',
 'bmrn',
 'ctas',
 'bbby',
 'banf',
 'atni',
 'abco',
 'arlp',
 'airm',
 'expo',
 'ctrp',
 'cpsi',
 'fast',
 'aaww',
 'core',
 'bbry',
 'expd',
 'akam',
 'cbsh',
 'cinf',
 'adsk',
 'adi',
 'ffin',
 'cass',
 'ande',
 'bcpc',
 'agii',
 'esnd',
 'ccmp',
 'discb',
 'chco',
 'bmrc',
 'dish',
 'disca',
 'alny',
 'adre',
 'cvlt',
 'fele',
 'dxpe',
 'calm',
 'ahgp',
 'dox',
 'bobe',
 'crvl',
 'endp',
 'ea',
 'aste',
 'csx',
 'cprt',
 'algn',
 'cree',
 'cmcsa',
 'cac',
 'ebay',
 'eeft',
 'faro',
 'dorm',
 'alco',
 'crzo',
 'abax',
 'amwd',
 'cake',
 'blkb

In [30]:
for index,company in enumerate(stats_sort):
    print('{} {} : {}'.format(index,
                              company,
                              round(dict_stats_company[company]['close']['mean'],2)))   
    

0 amzn : 275.13
1 aapl : 257.18
2 cme : 230.29
3 atri : 228.39
4 fcnca : 200.25
5 bidu : 193.53
6 eqix : 165.38
7 biib : 164.54
8 esgr : 114.27
9 bbh : 113.28
10 djco : 110.25
11 dhil : 104.55
12 csgp : 103.1
13 anat : 97.94
14 alxn : 97.11
15 cost : 96.17
16 cacc : 95.5
17 amgn : 92.23
18 bwld : 89.39
19 ffiv : 86.29
20 celg : 85.09
21 algt : 83.7
22 coke : 80.57
23 cswc : 77.76
24 cbrl : 76.64
25 chdn : 72.22
26 fisv : 67.53
27 esrx : 67.43
28 cern : 65.04
29 alog : 64.74
30 acgl : 63.33
31 anss : 62.33
32 chrw : 61.99
33 adp : 61.03
34 asml : 59.04
35 eslt : 58.58
36 casy : 58.5
37 ctxs : 58.02
38 ctsh : 57.91
39 dltr : 57.42
40 bokf : 56.17
41 dwaq : 55.78
42 cpla : 54.8
43 expe : 53.78
44 colm : 53.73
45 cohr : 53.71
46 cvco : 53.37
47 adbe : 51.2
48 chkp : 51.13
49 cmpr : 51.06
50 bmrn : 50.52
51 ctas : 50.48
52 bbby : 50.18
53 banf : 49.64
54 atni : 47.68
55 abco : 47.65
56 arlp : 46.94
57 airm : 46.92
58 expo : 46.1
59 ctrp : 45.16
60 cpsi : 44.44
61 fast : 44.41
62 aaww : 44.3

## Finding the most traded stock each day

In [None]:
dict_day_stock = list()

for key,value in dict_stock_price.items():
    for date,volume in zip(value['date'],value['volume']):
        tuple(key,date,volu)
                   
    dict_company[columns[0]] = [row[0] for row in value] 
    
    for col in columns[1:]:
        index = columns.index(col)
        dict_company[col] = [float(row[index]) for row in value] 
    
    dict_stock_price[key] = dict_company

dict_stock_price['aapl']    
