## Pandas

In [20]:
import pandas as pd
import numpy as np

## Series

- One dimensional sequence
- Internally is composed of 2 array (1 for data, 1 for index)


In [21]:
series = pd.Series([5,6,2,1])
print(series)

0    5
1    6
2    2
3    1
dtype: int64


In [22]:
series = pd.Series([5,6,6,8], index=['first','second','third','fourth'])
print(series)
print('Values ',series.values)
print('Index ',series.index)
print('Slice/Filter by index ',series[['first','third']])

first     5
second    6
third     6
fourth    8
dtype: int64
Values  [5 6 6 8]
Index  Index(['first', 'second', 'third', 'fourth'], dtype='object')
Slice/Filter by index  first    5
third    6
dtype: int64


In [23]:
series['first'] = 10
print(series)
print('Series > 6', series[series > 6])

first     10
second     6
third      6
fourth     8
dtype: int64
Series > 6 first     10
fourth     8
dtype: int64


**Counts of values**

In [40]:
series.value_counts()

6     2
10    1
8     1
dtype: int64

**Adding series to other series**
- Item with the same index values get added to each other.
- Other values get set to NaN

In [25]:
series2 = pd.Series([1,2,3,4], index=['first','second','third','fifth'])
print(series + series2)

fifth      NaN
first     11.0
fourth     NaN
second     8.0
third      9.0
dtype: float64


## Dataframe

**Basic Frame Operations**

In [33]:
data = {'thing' : ['plant','tv','chair','table','pot'],
        'colour' : ['yellow','red','blue','white','white'],
        'weight' : [2.1,2.4,1.6,1.0,1.75],
         'quantity' : [1,7,3,4,2]}

frame = pd.DataFrame(data)
print(frame)
print('Index -',frame.index)
print('Columns -',frame.columns)
print('colour -', frame.colour)

   colour  quantity  thing  weight
0  yellow         1  plant    2.10
1     red         7     tv    2.40
2    blue         3  chair    1.60
3   white         4  table    1.00
4   white         2    pot    1.75
Index - RangeIndex(start=0, stop=5, step=1)
Columns - Index(['colour', 'quantity', 'thing', 'weight'], dtype='object')
colour - 0    yellow
1       red
2      blue
3     white
4     white
Name: colour, dtype: object


In [35]:
print('head - first 5 rows -', frame.head())
print('tail - last 5 rows -', frame.tail())

head - first 5 rows -    colour  quantity  thing  weight
0  yellow         1  plant    2.10
1     red         7     tv    2.40
2    blue         3  chair    1.60
3   white         4  table    1.00
4   white         2    pot    1.75
tail - last 5 rows -    colour  quantity  thing  weight
0  yellow         1  plant    2.10
1     red         7     tv    2.40
2    blue         3  chair    1.60
3   white         4  table    1.00
4   white         2    pot    1.75


**Getting the values of a column/row**

In [38]:
print(frame.colour)
print(frame[1:4].colour)
print(frame['colour'][1:4])
# Select multiple columns from the dataframe
print(frame[['colour','quantity']])

0    yellow
1       red
2      blue
3     white
4     white
Name: colour, dtype: object
1      red
2     blue
3    white
Name: colour, dtype: object
1      red
2     blue
3    white
Name: colour, dtype: object
   colour  quantity
0  yellow         1
1     red         7
2    blue         3
3   white         4
4   white         2


**Counts of values**

In [43]:
counts = frame['colour'].value_counts()
print(counts[:2])

white    2
blue     1
Name: colour, dtype: int64


**Editing the values in the data frame**

In [28]:
frame['quantity'][1] =2
print(frame)

   colour  quantity  thing  weight
0  yellow         1  plant    2.10
1     red         2     tv    2.40
2    blue         3  chair    1.60
3   white         4  table    1.00
4   white         2    pot    1.75


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


**Adding columns to the frame**

In [30]:
frame['size']=['S','S','L','M','XL']
print(frame)

   colour  quantity  thing  weight size
0  yellow         1  plant    2.10    S
1     red         2     tv    2.40    S
2    blue         3  chair    1.60    L
3   white         4  table    1.00    M
4   white         2    pot    1.75   XL


**Deleting columns**

In [31]:
del frame['size']
print(frame)

   colour  quantity  thing  weight
0  yellow         1  plant    2.10
1     red         2     tv    2.40
2    blue         3  chair    1.60
3   white         4  table    1.00
4   white         2    pot    1.75
