In [1]:
import numpy as np
import pandas as pd

### Pandas create a new Series

In [8]:
s = pd.Series((1,2,3,4,5))
print(s)

0    1
1    2
2    3
3    4
4    5
dtype: int64


print pandas series index

In [9]:
print(s.index)

RangeIndex(start=0, stop=5, step=1)


In [10]:
print(s[0])

1


In [11]:
s = pd.Series(np.random.randn(3), ('a','b','c'))
print(s)

a    0.219099
b    0.411419
c    1.582036
dtype: float64


In [18]:
print(s.index)

Index(['a', 'b', 'c'], dtype='object')


In [13]:
print(s[0])

0.2190994945383005


In [20]:
print(s.mean())
print(s.sum())

0.7375180325330879
2.2125540975992637


### convert pandas series to python dictonary

In [22]:
s = dict(s)
print(s)

{'a': 0.2190994945383005, 'b': 0.4114188923150486, 'c': 1.5820357107459146}


### convert dictionary to series

In [23]:
s = pd.Series(s)
print(s)

a    0.219099
b    0.411419
c    1.582036
dtype: float64


## Difference between numpy array and pandas series

### in numpy you can only access array using indexing, but in pandas you can access elements using names that you define

In [24]:
arr = np.array((1,2,3,4,5))
print(arr[0])  ## you can only access element with indexing

1


In [26]:
# but in pandas series you can access elements with a name you define (like dictionaries)
print(s['a'])

0.2190994945383005


### pandas series can hold different data types unlike numpy array

In [27]:
arr[0] = "yello"

ValueError: invalid literal for int() with base 10: 'yello'

In [30]:
s['a'] = "Yello series"
s['c'] = 3

In [31]:
print(s)

a    Yello series
b        0.411419
c               3
dtype: object


In [64]:
# We create a Pandas Series that stores a grocery list
groceries = pd.Series(data = [[30,20], 6, 'Yes', 'No'], index = ['eggs', 'apples', 'milk', 'bread'])

# We display the Groceries Pandas Series
groceries


eggs      [30, 20]
apples           6
milk           Yes
bread           No
dtype: object

In [34]:
print('Groceries has shape:', groceries.shape)
print('Groceries has dimension:', groceries.ndim)
print('Groceries has a total of', groceries.size, 'elements')


Groceries has shape: (4,)
Groceries has dimension: 1
Groceries has a total of 4 elements


In [35]:
# We print the index and data of Groceries
print('The data in Groceries is:', groceries.values)
print('The index of Groceries is:', groceries.index)


The data in Groceries is: [list([30, 20]) 6 'Yes' 'No']
The index of Groceries is: Index(['eggs', 'apples', 'milk', 'bread'], dtype='object')


In [36]:
print(groceries['eggs'])

[30, 20]


In [44]:
print('Car' in groceries)
print('eggs' in groceries)

print(6 in groceries.values)

print(20 in groceries['eggs'])

False
True
True
True


In [45]:
# we can access multiple index labels
print('Do we need milk and bread:\n', groceries[['milk', 'bread']]) 
print()

Do we need milk and bread:
 milk     Yes
bread     No
dtype: object



In [52]:
print(groceries[0]) 
print()


[30, 20]



In [48]:
print('How many eggs and apples do we need to buy:\n',  groceries[[0, 1]]) 
print()


How many eggs and apples do we need to buy:
 eggs      [30, 20]
apples           6
dtype: object



In [53]:
# we use loc to access multiple index labels
print('How many eggs and apples do we need to buy:\n', groceries.loc[['eggs', 'apples']]) 
print()


How many eggs and apples do we need to buy:
 eggs      [30, 20]
apples           6
dtype: object



In [54]:
# we use iloc to access multiple numerical indices
print('Do we need milk and bread:\n', groceries.iloc[[2, 3]]) 


Do we need milk and bread:
 milk     Yes
bread     No
dtype: object


In [56]:
print(groceries)

eggs      [30, 20]
apples           6
milk           Yes
bread           No
dtype: object


In [58]:
print('We remove apples (out of place):\n', groceries.drop('apples'))


We remove apples (out of place):
 eggs     [30, 20]
milk          Yes
bread          No
dtype: object


In [59]:
print(groceries)

eggs      [30, 20]
apples           6
milk           Yes
bread           No
dtype: object


In [63]:
groceries = groceries.drop('apples')
print(groceries)

eggs     [30, 20]
milk          Yes
bread          No
dtype: object


In [65]:
groceries.drop('apples', inplace = True)

In [66]:
print(groceries)

eggs     [30, 20]
milk          Yes
bread          No
dtype: object


## Arithmetic operations with pandas

In [85]:
# We create a Pandas Series that stores a grocery list of just fruits
fruits= pd.Series(data = [10, 6, 3,], index = ['apples', 'oranges', 'bananas'])

# We display the fruits Pandas Series
fruits


apples     10
oranges     6
bananas     3
dtype: int64

In [86]:
print(fruits+2)
print("========")
print(np.sqrt(fruits))
print("========")
print(np.power(fruits,2))

apples     12
oranges     8
bananas     5
dtype: int64
apples     3.162278
oranges    2.449490
bananas    1.732051
dtype: float64
apples     100
oranges     36
bananas      9
dtype: int64


In [87]:
fruits['apples'] = fruits['apples'] + 2
print(fruits)

apples     12
oranges     6
bananas     3
dtype: int64


In [93]:
print(fruits.loc[['apples', 'oranges']])

apples     12
oranges     6
dtype: int64


In [94]:
print('We half the amount of apples and oranges:\n', fruits.loc[['apples', 'oranges']] / 2)


We half the amount of apples and oranges:
 apples     6.0
oranges    3.0
dtype: float64


In [96]:
fruits['apples'] = "Green Apple"
print(fruits)

apples     Green Apple
oranges              6
bananas              3
dtype: object


In [100]:
print(fruits['apples']*2)

Green AppleGreen Apple


In [101]:
print(fruits['apples']/2)

TypeError: unsupported operand type(s) for /: 'str' and 'int'

In [102]:
print(fruits['apples']+2)

TypeError: can only concatenate str (not "int") to str

## Pandas Dataframes

In [110]:
# We create a dictionary of Pandas Series 
items = {'Bob' : pd.Series(data = [245, 25, 55], index = ['bike', 'pants', 'watch']),
         'Alice' : pd.Series(data = [40, 110, 500, 45], index = ['book', 'glasses', 'bike', 'pants'])}

items

{'Bob': bike     245
 pants     25
 watch     55
 dtype: int64,
 'Alice': book        40
 glasses    110
 bike       500
 pants       45
 dtype: int64}

In [111]:
shopping_carts = pd.DataFrame(items)
shopping_carts

Unnamed: 0,Bob,Alice
bike,245.0,500.0
book,,40.0
glasses,,110.0
pants,25.0,45.0
watch,55.0,


In [112]:
# We print some information about shopping_carts
print('shopping_carts has shape:', shopping_carts.shape)
print('shopping_carts has dimension:', shopping_carts.ndim)
print('shopping_carts has a total of:', shopping_carts.size, 'elements')
print()
print('The data in shopping_carts is:\n', shopping_carts.values)
print()
print('The row index in shopping_carts is:', shopping_carts.index)
print()
print('The column index in shopping_carts is:', shopping_carts.columns)


shopping_carts has shape: (5, 2)
shopping_carts has dimension: 2
shopping_carts has a total of: 10 elements

The data in shopping_carts is:
 [[245. 500.]
 [ nan  40.]
 [ nan 110.]
 [ 25.  45.]
 [ 55.  nan]]

The row index in shopping_carts is: Index(['bike', 'book', 'glasses', 'pants', 'watch'], dtype='object')

The column index in shopping_carts is: Index(['Bob', 'Alice'], dtype='object')


In [120]:
print(shopping_carts['Bob'])
print("===========")
print(shopping_carts.loc['bike'])
print("===========")
print(shopping_carts.loc['bike']['Bob'])

bike       245.0
book         NaN
glasses      NaN
pants       25.0
watch       55.0
Name: Bob, dtype: float64
Bob      245.0
Alice    500.0
Name: bike, dtype: float64
245.0


In [121]:
# We create a dictionary of Pandas Series without indexes
data = {'Bob' : pd.Series([245, 25, 55]),
        'Alice' : pd.Series([40, 110, 500, 45])}

# We create a DataFrame
df = pd.DataFrame(data)

# We display the DataFrame
df


Unnamed: 0,Bob,Alice
0,245.0,40
1,25.0,110
2,55.0,500
3,,45


In [122]:
# We Create a DataFrame that only has Bob's data
bob_shopping_cart = pd.DataFrame(items, columns=['Bob'])

# We display bob_shopping_cart
bob_shopping_cart


Unnamed: 0,Bob
bike,245
pants,25
watch,55


### Dealing with NaN

In [124]:
x =  shopping_carts.isnull().sum().sum()
x

3

In [125]:
shopping_carts.isnull()


Unnamed: 0,Bob,Alice
bike,False,False
book,True,False
glasses,True,False
pants,False,False
watch,False,True


In [130]:
# drop any row with NaN value
print(shopping_carts.dropna(axis = 0))


         Bob  Alice
bike   245.0  500.0
pants   25.0   45.0


In [131]:
# drop any column with NaN value
print(shopping_carts.dropna(axis = 1))


Empty DataFrame
Columns: []
Index: [bike, book, glasses, pants, watch]
