# Quick pandas Tricks

In [6]:
import pandas as pd
pd.options.display.max_rows = 10
import pandas_datareader as pdr

In [7]:
stocks = ['GOOG', 'AMZN', 'FB', 'NFLX']

### Dict comprehension 

In [12]:
all_data = {symbol: pdr.get_data_yahoo(symbol, start='20180701')
           for symbol in stocks}
all_data

{'GOOG':                    High          Low         Open        Close   Volume  \
 Date                                                                      
 2018-07-02  1128.000000  1093.800049  1099.000000  1127.459961  1217300   
 2018-07-03  1135.819946  1100.020020  1135.819946  1102.890015   679000   
 2018-07-05  1127.500000  1108.479980  1110.530029  1124.270020  1066700   
 2018-07-06  1140.930054  1120.737061  1123.579956  1140.170044   996100   
 2018-07-09  1154.670044  1143.420044  1148.479980  1154.050049   909000   
 ...                 ...          ...          ...          ...      ...   
 2019-07-16  1158.579956  1145.000000  1146.000000  1153.579956  1238800   
 2019-07-17  1158.359985  1145.770020  1150.969971  1146.349976  1170000   
 2019-07-18  1147.604980  1132.729980  1141.739990  1146.329956  1291300   
 2019-07-19  1151.140015  1129.619995  1148.189941  1130.099976  1646300   
 2019-07-22  1136.515015  1124.239990  1133.449951  1129.877441   527354   
 
  

### Split off keys to make dataframe for analysis

In [13]:
close = pd.DataFrame({symbol: data['Close']
                     for symbol, data in all_data.items()})
close

Unnamed: 0_level_0,GOOG,AMZN,FB,NFLX
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2018-07-02,1127.459961,1713.780029,197.360001,398.179993
2018-07-03,1102.890015,1693.959961,192.729996,390.519989
2018-07-05,1124.270020,1699.729980,198.449997,398.390015
2018-07-06,1140.170044,1710.630005,203.229996,408.250000
2018-07-09,1154.050049,1739.020020,204.740005,418.970001
...,...,...,...,...
2019-07-16,1153.579956,2009.900024,203.839996,365.989990
2019-07-17,1146.349976,1992.030029,201.800003,362.440002
2019-07-18,1146.329956,1977.900024,200.779999,325.209991
2019-07-19,1130.099976,1964.520020,198.360001,315.100006


### Suppose you are concerned with duplicate data in a dataset

In [15]:
dups = close.duplicated()

### drop_duplicates applies the boolean Series an returns False

In [16]:
import numpy.random as npr
index = npr.randint(2009, 2019, 100)
sales = pd.DataFrame({"Sales": npr.randint(2, 20, 100)}, index=index)

In [17]:
sales.count()

Sales    100
dtype: int64

In [19]:
sales.drop_duplicates().count()

Sales    18
dtype: int64

### Replacing data

In [20]:
sales = pd.read_csv('data.csv')
sales.head()

Unnamed: 0,OrderDate,Region,Rep,Item,Units,Unit Cost,Total
0,1/6/18,East,Jones,Pencil,95,1.99,189.05
1,1/23/18,Central,Kivell,Binder,50,19.99,999.5
2,2/9/18,Central,Jardine,Pencil,36,4.99,179.64
3,2/26/18,Central,Gill,Pen,27,19.99,539.73
4,3/15/18,West,Sorvino,Pencil,56,2.99,167.44


In [21]:
sales.replace('Pencil', 'Stylus')

Unnamed: 0,OrderDate,Region,Rep,Item,Units,Unit Cost,Total
0,1/6/18,East,Jones,Stylus,95,1.99,189.05
1,1/23/18,Central,Kivell,Binder,50,19.99,999.50
2,2/9/18,Central,Jardine,Stylus,36,4.99,179.64
3,2/26/18,Central,Gill,Pen,27,19.99,539.73
4,3/15/18,West,Sorvino,Stylus,56,2.99,167.44
...,...,...,...,...,...,...,...
38,10/14/19,West,Thompson,Binder,57,19.99,1139.43
39,10/31/19,Central,Andrews,Stylus,14,1.29,18.06
40,11/17/19,Central,Jardine,Binder,11,4.99,54.89
41,12/4/19,Central,Jardine,Binder,94,19.99,1879.06


In [22]:
sales.head()

Unnamed: 0,OrderDate,Region,Rep,Item,Units,Unit Cost,Total
0,1/6/18,East,Jones,Pencil,95,1.99,189.05
1,1/23/18,Central,Kivell,Binder,50,19.99,999.5
2,2/9/18,Central,Jardine,Pencil,36,4.99,179.64
3,2/26/18,Central,Gill,Pen,27,19.99,539.73
4,3/15/18,West,Sorvino,Pencil,56,2.99,167.44


In [23]:
sales.replace({1.99:2.99, 2.99: 3.99})

Unnamed: 0,OrderDate,Region,Rep,Item,Units,Unit Cost,Total
0,1/6/18,East,Jones,Pencil,95,2.99,189.05
1,1/23/18,Central,Kivell,Binder,50,19.99,999.50
2,2/9/18,Central,Jardine,Pencil,36,4.99,179.64
3,2/26/18,Central,Gill,Pen,27,19.99,539.73
4,3/15/18,West,Sorvino,Pencil,56,3.99,167.44
...,...,...,...,...,...,...,...
38,10/14/19,West,Thompson,Binder,57,19.99,1139.43
39,10/31/19,Central,Andrews,Pencil,14,1.29,18.06
40,11/17/19,Central,Jardine,Binder,11,4.99,54.89
41,12/4/19,Central,Jardine,Binder,94,19.99,1879.06
