In [2]:
import pandas as pd

***
* <code>pd.Series(data, index)</code>  
  
(The <code>index</code> is a list of index labels.)

In [3]:
groceries = pd.Series(data=[30, 6, 'Yes', 'No'], index=['eggs', 'apples', 'milk', 'bread'])
groceries

eggs       30
apples      6
milk      Yes
bread      No
dtype: object

In [4]:
# For use later in notebook:
og_groceries = groceries.copy()

***
* ```.shape(axis)```
* ```.ndim```
* ```.size```

In [5]:
print('Groceries has shape:', groceries.shape)
print('Groceries has dimension:', groceries.ndim)
print('Groceries has a total of', groceries.size, 'elements')

Groceries has shape: (4,)
Groceries has dimension: 1
Groceries has a total of 4 elements


***
* ```.values```
* ```.index```

In [6]:
print('The data in Groceries is:', groceries.values)
print('The index of Groceries is:', groceries.index)

The data in Groceries is: [30 6 'Yes' 'No']
The index of Groceries is: Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')


***
* ```in``` boolean operator

In [7]:
x = 'bananas' in groceries
y = 'bread' in groceries

print('Is bananas an index label in Groceries:', x)
print('Is bread an index label in Groceries:', y)

Is bananas an index label in Groceries: False
Is bread an index label in Groceries: True


***
### Accessing and Deleting elements in a Pandas Series  

* ```.loc``` : stands for location
* ```.iloc``` : stands for integer location  
  
*Elements can also be accessed using:*
* index labels: ```pd_series['label']``` or ```pd_series[['label1', 'label2']]```
* int index positions: ```pd_series[int]``` or ```pd_series[[int1, int2]]```

In [8]:
print("groceries['eggs']:", groceries['eggs'], "\n")
print("groceries[['milk', 'bread']]:\n", groceries[['milk', 'bread']], "\n")
print("groceries.loc[['eggs', 'apples']]):\n", groceries.loc[['eggs', 'apples']], "\n")
print("groceries[0]:", groceries[0], "\n")
print("groceries[[0, 1]]:\n", groceries[[0, 1]], "\n")
print("groceries.iloc[[2, 3]]:\n", groceries.iloc[[2, 3]])

groceries['eggs']: 30 

groceries[['milk', 'bread']]:
 milk     Yes
bread     No
dtype: object 

groceries.loc[['eggs', 'apples']]):
 eggs      30
apples     6
dtype: object 

groceries[0]: 30 

groceries[[0, 1]]:
 eggs      30
apples     6
dtype: object 

groceries.iloc[[2, 3]]:
 milk     Yes
bread     No
dtype: object


***
* Mutability:
* ```pd_series['label'] = new_value```

In [9]:
print('Original groceries:\n', groceries, "\n")
groceries['eggs'] = 2
print('Modified groceries:\n', groceries)
# Reset groceries for later examples in notebook
groceries = og_groceries.copy()

Original groceries:
 eggs       30
apples      6
milk      Yes
bread      No
dtype: object 

Modified groceries:
 eggs        2
apples      6
milk      Yes
bread      No
dtype: object


***
* ```.drop(label, inplace=False)```
>*By default, this method deletes elements **without modifying** the original series.  
Set* ```inplace=True``` *to modify the original series.*

In [10]:
print('Original groceries series:\n', groceries, '\n')
print("['apples'] removed (out of place):\n", groceries.drop('apples'), "\n")
print("Original groceries still intact:\n", groceries, "\n")
groceries.drop('apples', inplace=True)
print("Original after ['apples'] dropped (in place):\n", groceries)
# Reset groceries for later examples in notebook
groceries = og_groceries.copy()

Original groceries series:
 eggs       30
apples      6
milk      Yes
bread      No
dtype: object 

['apples'] removed (out of place):
 eggs      30
milk     Yes
bread     No
dtype: object 

Original groceries still intact:
 eggs       30
apples      6
milk      Yes
bread      No
dtype: object 

Original after ['apples'] dropped (in place):
 eggs      30
milk     Yes
bread     No
dtype: object


***
### Element-Wise Arithmetic Operations and Broadcasting  
*Broadcasted arithmetic:*
* ```pd_series |operator| operand``` : "out of place" operation
* ```+``` , ```-```, ```*```, ```/```, ```**```
* example: ```pd_series + 5```

In [11]:
fruits= pd.Series(data = [10, 6, 3,], index = ['apples', 'oranges', 'bananas'])
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [12]:
# multiplication example (out of place)
fruits * 2

apples     20
oranges    12
bananas     6
dtype: int64

In [13]:
# multiplication performed on series with mixed data-types
print(groceries)
groceries * 2

eggs       30
apples      6
milk      Yes
bread      No
dtype: object


eggs          60
apples        12
milk      YesYes
bread       NoNo
dtype: object

*Data-types must be valid for operators otherwise python will return an error:*  
```groceries / 2``` will return ```TypeError: unsupported operand type(s) for /: 'str' and 'int'```

In [14]:
# Note that fruits and groceries are unchanged
print(fruits)
print()
print(groceries)

apples     10
oranges     6
bananas     3
dtype: int64

eggs       30
apples      6
milk      Yes
bread      No
dtype: object


***
*Arithmetic "out of place" operations on individual elements:*  
* ```pd_series['label'] |operator| operand```
* ```pd_series.iloc[int] |operator| operand```

In [15]:
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [16]:
print('fruits["apples"] + 5 =', fruits['apples'] + 5)

fruits["apples"] + 5 = 15


In [17]:
print('fruits.iloc[0] - 2 =', fruits.iloc[0] - 2)

fruits.iloc[0] - 2 = 8


In [18]:
# Note that fruits is unchanged
fruits

apples     10
oranges     6
bananas     3
dtype: int64

***
*Numpy mathematical functions can be used (all out of place operations):*
* ```np.exp(pd_series)```
* ```np.sqrt(pd_series)```
* ```np.power(pd_series, power)```

In [19]:
import numpy as np
fruits

apples     10
oranges     6
bananas     3
dtype: int64

In [20]:
np.exp(fruits)

apples     22026.465795
oranges      403.428793
bananas       20.085537
dtype: float64

In [21]:
np.sqrt(fruits)

apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64

In [22]:
np.power(fruits, 2)

apples     100
oranges     36
bananas      9
dtype: int64

***
### DataFrames  
* ```pd.DataFrame(data, index=[Optional], columns=[Optional])```

In [23]:
# create dictionary of pd.Series
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}
type(items)

dict

In [24]:
shopping_cart = pd.DataFrame(items)
shopping_cart
# Note: NaN stands for 'Not a Number'

Unnamed: 0,Alice,Bob
bike,500.0,245.0
book,40.0,
glasses,110.0,
pants,45.0,25.0
watch,,55.0


*DataFrame without defined index labels:*
* ```pd.DataFrame(data)```

In [25]:
data = {'Bob' : pd.Series([245, 25, 55]),
        'Alice' : pd.Series([40, 110, 500, 45])}
df = pd.DataFrame(data)
df

Unnamed: 0,Alice,Bob
0,40,245.0
1,110,25.0
2,500,55.0
3,45,


*DataFrame attributes:*

In [26]:
print(shopping_cart.shape)
print(shopping_cart.ndim)
print(shopping_cart.size)

(5, 2)
2
10


In [27]:
print(shopping_cart.values)
print('\n',shopping_cart.index)
print('\n',shopping_cart.columns)

[[ 500.  245.]
 [  40.   nan]
 [ 110.   nan]
 [  45.   25.]
 [  nan   55.]]

 Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

 Index(['Alice', 'Bob'], dtype='object')


*Selective entry into DataFrame:*

In [28]:
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])
bob_shopping_cart

Unnamed: 0,Bob
bike,245
pants,25
watch,55


In [29]:
sel_shopping_cart = pd.DataFrame(items, index = ['pants', 'book'])
sel_shopping_cart

Unnamed: 0,Alice,Bob
pants,45,25.0
book,40,


In [30]:
alice_sel_shopping_cart = pd.DataFrame(items, index = ['glasses', 'bike'], columns = ['Alice'])
alice_sel_shopping_cart

Unnamed: 0,Alice
glasses,110
bike,500


*Creation from dictionary of lists (arrays):*

In [31]:
data = {'Integers' : [1,2,3],
        'Floats' : [4.5, 8.2, 9.6]}
df = pd.DataFrame(data, index = ['label 1', 'label 2', 'label 3'])
df

Unnamed: 0,Floats,Integers
label 1,4.5,1
label 2,8.2,2
label 3,9.6,3


*Creation from list of dictionaries:*

In [32]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35}, 
          {'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5}]
store_items = pd.DataFrame(items2, index = ['store 1', 'store 2'])
store_items

Unnamed: 0,bikes,glasses,pants,watches
store 1,20,,30,35
store 2,15,50.0,5,10


***
### Accessing Elements in DataFrames
>*Note: Order of access is* ```dataframe[column][row]```

In [33]:
store_items

Unnamed: 0,bikes,glasses,pants,watches
store 1,20,,30,35
store 2,15,50.0,5,10


In [34]:
store_items[['bikes']]

Unnamed: 0,bikes
store 1,20
store 2,15


In [35]:
store_items[['bikes', 'pants']]

Unnamed: 0,bikes,pants
store 1,20,30
store 2,15,5


In [36]:
store_items.loc[['store 1']]

Unnamed: 0,bikes,glasses,pants,watches
store 1,20,,30,35


In [37]:
# dataframe[column][row]
store_items['bikes']['store 2']

15

***
*Adding new column:*
* ```dataframe['new_column'] = [values]```

In [38]:
store_items['shirts'] = [15, 2]
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts
store 1,20,,30,35,15
store 2,15,50.0,5,10,2


*Adding new columns with sums between other columns:*
* ```dframe['new_column'] = dframe['column1'] + dframe['column2']```

In [39]:
store_items['suits'] = store_items['pants'] + store_items['shirts']
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts,suits
store 1,20,,30,35,15,45
store 2,15,50.0,5,10,2,7


*Adding new columns with data from specified rows & columns:*
* ```dframe['new_column'] = dframe['column'][row_start:row_end]```

In [40]:
store_items['new watches'] = store_items['watches'][1:]
store_items

Unnamed: 0,bikes,glasses,pants,watches,shirts,suits,new watches
store 1,20,,30,35,15,45,
store 2,15,50.0,5,10,2,7,10.0


*Adding new rows:*
* ```dataframe = dataframe.append(new_row_data)```
>*Note: This method causes columns to become ordered alphabetically.*

In [41]:
# create new dictionary of items
new_items = [{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4}]
# create new single-row DataFrame using dictionary
new_store = pd.DataFrame(new_items, index = ['store 3'])
print(new_store)
# append new DataFrame to existing one
store_items = store_items.append(new_store)
store_items

         bikes  glasses  pants  watches
store 3     20        4     30       35


Unnamed: 0,bikes,glasses,new watches,pants,shirts,suits,watches
store 1,20,,,30,15.0,45.0,35
store 2,15,50.0,10.0,5,2.0,7.0,10
store 3,20,4.0,,30,,,35


*Insert new columns to specified location:*
* ```.insert(loc, label, data)``` | (```loc``` is an array position)

In [42]:
store_items.insert(4, 'shoes', [8,5,0])
store_items

Unnamed: 0,bikes,glasses,new watches,pants,shoes,shirts,suits,watches
store 1,20,,,30,8,15.0,45.0,35
store 2,15,50.0,10.0,5,5,2.0,7.0,10
store 3,20,4.0,,30,0,,,35


***
* ```.pop(column)``` - deletes columns
* ```.drop()``` - deletes both rows and columns with axis= keyword

In [43]:
store_items.pop('new watches')
store_items

Unnamed: 0,bikes,glasses,pants,shoes,shirts,suits,watches
store 1,20,,30,8,15.0,45.0,35
store 2,15,50.0,5,5,2.0,7.0,10
store 3,20,4.0,30,0,,,35


In [44]:
store_items = store_items.drop(['watches', 'shoes'], axis=1)
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store 1,20,,30,15.0,45.0
store 2,15,50.0,5,2.0,7.0
store 3,20,4.0,30,,


In [45]:
store_items = store_items.drop(['store 2', 'store 1'], axis=0)
store_items

Unnamed: 0,bikes,glasses,pants,shirts,suits
store 3,20,4.0,30,,


***
* ```.rename(column={'old label': 'new label')```
* ```.rename(index={'old label': 'new label')```

In [46]:
store_items = store_items.rename(columns = {'bikes': 'hats'})
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
store 3,20,4.0,30,,


In [47]:
store_items = store_items.rename(index = {'store 3': 'last store'})
store_items

Unnamed: 0,hats,glasses,pants,shirts,suits
last store,20,4.0,30,,


*Can change the index to be one of the columns of the DataFrame:*

In [48]:
store_items = store_items.set_index('pants')
store_items

Unnamed: 0_level_0,hats,glasses,shirts,suits
pants,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30,20,4.0,,


***
### Dealing with NaN values


In [49]:
items2 = [{'bikes': 20, 'pants': 30, 'watches': 35, 'shirts': 15, 'shoes':8, 'suits':45},
{'watches': 10, 'glasses': 50, 'bikes': 15, 'pants':5, 'shirts': 2, 'shoes':5, 'suits':7},
{'bikes': 20, 'pants': 30, 'watches': 35, 'glasses': 4, 'shoes':10}]

store_items = pd.DataFrame(items2, index=['store 1', 'store 2', 'store 3'])
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


***
* ```.isnull()```

In [50]:
store_items.isnull()

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,False,True,False,False,False,False,False
store 2,False,False,False,False,False,False,False
store 3,False,False,False,True,False,True,False


*Combining* ```.isnull()``` *with* ```.sum()``` *to count number of NaN values:*

In [51]:
store_items.isnull().sum()

bikes      0
glasses    1
pants      0
shirts     1
shoes      0
suits      1
watches    0
dtype: int64

In [52]:
store_items.isnull().sum().sum()

3

***
* ```.count()``` *will find the number of **non-NaN** values*

In [53]:
store_items.count()

bikes      3
glasses    2
pants      3
shirts     2
shoes      3
suits      2
watches    3
dtype: int64

***
* ```.dropna(axis, inplace=False)``` *drops any NaN values from either rows* ```(axis=0)``` *or columns* ```(axis=1)``` 

In [54]:
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


In [55]:
store_items.dropna(axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 2,15,50.0,5,2.0,5,7.0,10


In [56]:
store_items.dropna(axis=1)

Unnamed: 0,bikes,pants,shoes,watches
store 1,20,30,8,35
store 2,15,5,5,10
store 3,20,30,10,35


***
* ```.fillna(method=Optional, axis, inplace=False)```

In [57]:
# replace NaN values with 0
store_items.fillna(0)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,0.0,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,0.0,10,0.0,35


* *Forward filling with* ```.ffillna(method='ffill', axis)```:

In [58]:
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


In [59]:
store_items.fillna(method='ffill', axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,2.0,10,7.0,35


In [60]:
store_items.fillna(method='ffill', axis=1)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20.0,20.0,30.0,15.0,8.0,45.0,35.0
store 2,15.0,50.0,5.0,2.0,5.0,7.0,10.0
store 3,20.0,4.0,30.0,30.0,10.0,10.0,35.0


* *Backward filling with* ```.fillna(method='backfill', axis)```

In [61]:
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


In [62]:
store_items.fillna(method='backfill', axis=1)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20.0,30.0,30.0,15.0,8.0,45.0,35.0
store 2,15.0,50.0,5.0,2.0,5.0,7.0,10.0
store 3,20.0,4.0,30.0,10.0,10.0,35.0,35.0


In [63]:
store_items.fillna(method='backfill', axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,50.0,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


***
*Replacing NaN values with interpolation:*
* ```.interpolate(method, axis)```

In [64]:
store_items

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,,10,,35


*Linear interpolation:*

In [65]:
store_items.interpolate(method='linear', axis=0)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20,,30,15.0,8,45.0,35
store 2,15,50.0,5,2.0,5,7.0,10
store 3,20,4.0,30,2.0,10,7.0,35


In [66]:
store_items.interpolate(method='linear', axis=1)

Unnamed: 0,bikes,glasses,pants,shirts,shoes,suits,watches
store 1,20.0,25.0,30.0,15.0,8.0,45.0,35.0
store 2,15.0,50.0,5.0,2.0,5.0,7.0,10.0
store 3,20.0,4.0,30.0,20.0,10.0,22.5,35.0


***
*Playing around:*

In [79]:
#store_items[(store_items.max()).any(axis=1)].index