## Installing and Running pandas

In [1]:
import pandas as pd
import numpy as np
import mne as s # EEg signals library

In [2]:
s.__version__

'1.0.2'

## Introduction to pandas Series

In [3]:
import pandas as pd

In [4]:
products = ['A', 'B', 'C', 'D']
products

['A', 'B', 'C', 'D']

In [5]:
type(products)

list

In [6]:
product_categories = pd.Series(products)

In [7]:
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [8]:
type(product_categories)

pandas.core.series.Series

In [9]:
type(pd.Series(products))

pandas.core.series.Series

In [10]:
daily_rates_dollars = pd.Series([40, 45, 50, 60])
daily_rates_dollars

0    40
1    45
2    50
3    60
dtype: int64

In [11]:
print(daily_rates_dollars)

0    40
1    45
2    50
3    60
dtype: int64


In [12]:
import numpy as np

In [13]:
array_a = np.array([10, 20, 30, 40, 50])
array_a

array([10, 20, 30, 40, 50])

In [14]:
type(array_a)

numpy.ndarray

In [15]:
series_a = pd.Series(array_a)
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int32

In [16]:
print(series_a)

0    10
1    20
2    30
3    40
4    50
dtype: int32


In [17]:
type(series_a)

pandas.core.series.Series

## Working with Attributes in Python

In [18]:
import pandas as pd

In [19]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [20]:
series_a.dtype

dtype('int64')

In [21]:
series_a.size

5

In [22]:
product_categories = pd.Series(['A', 'B', 'C', 'D'])
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [23]:
product_categories.dtype

dtype('O')

In [24]:
product_categories.size

4

In [25]:
type(product_categories.size)

int

In [26]:
product_categories.name

In [27]:
print(product_categories.name)

None


In [28]:
product_categories.name = "Product Categories"
product_categories

0    A
1    B
2    C
3    D
Name: Product Categories, dtype: object

In [29]:
product_categories.name

'Product Categories'

In [30]:
print(product_categories.name)

Product Categories


## Using an Index in pandas

In [31]:
import pandas as pd

In [32]:
prices_per_category = {'Product A': 22250, 'Product B' : 16600, 'Product C':15600}
prices_per_category

{'Product A': 22250, 'Product B': 16600, 'Product C': 15600}

In [33]:
type(prices_per_category)

dict

In [34]:
prices_per_category = pd.Series(prices_per_category)
prices_per_category

Product A    22250
Product B    16600
Product C    15600
dtype: int64

In [35]:
type(prices_per_category)

pandas.core.series.Series

In [36]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [37]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

## Label-based vs Position-based Indexing

In [38]:
import pandas as pd

In [39]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [40]:
series_a.index

RangeIndex(start=0, stop=5, step=1)

In [41]:
type(series_a.index)

pandas.core.indexes.range.RangeIndex

In [42]:
list(series_a.index)

[0, 1, 2, 3, 4]

In [43]:
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [44]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [45]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

## More on Working with Indices in Python

In [46]:
import pandas as pd

In [47]:
series_a = pd.Series([10, 20, 30, 40, 50])
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})

In [48]:
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [49]:
series_a[0]

10

In [50]:
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [51]:
prices_per_category['Product A']

22250

In [52]:
# prices_per_category[0]
# prices_per_category[1]
prices_per_category[2]

12500

In [54]:
series_c = pd.Series([10, 20, 30, 40, 50], index = ["4", "5", "6", "7", "8"])
series_c

4    10
5    20
6    30
7    40
8    50
dtype: int64

In [55]:
series_c[1]

20

In [106]:
series_c["6"]

30

In [57]:
series_c[0]

10

In [58]:
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

## Using Methods in Python - Part I

In [59]:
import pandas as pd

In [60]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [61]:

start_date_deposits

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2000
10/3/2014     2000
9/18/2015     2500
dtype: int64

In [62]:
start_date_deposits.sum()

29500

In [63]:
start_date_deposits.min()

1000

In [64]:
start_date_deposits.max()

4000

In [65]:
start_date_deposits.idxmax()

'9/3/2015'

In [66]:
start_date_deposits.idxmin()

'12/8/2012'

## Using Methods in Python - Part II

In [107]:
import pandas as pd

In [108]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [109]:
start_date_deposits

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2000
10/3/2014     2000
9/18/2015     2500
dtype: int64

In [112]:
start_date_deposits.head(6)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
dtype: int64

In [113]:
start_date_deposits.tail(6)

9/3/2015     4000
7/25/2016    2000
5/1/2014     2000
3/29/2013    2000
10/3/2014    2000
9/18/2015    2500
dtype: int64

## Parameters vs Arguments

In [72]:
import pandas as pd

In [73]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [74]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [75]:
start_date_deposits.head(3)

7/4/2014     2000
1/2/2015     2000
12/8/2012    1000
dtype: int64

In [76]:
start_date_deposits.head(10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [77]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [116]:
start_date_deposits.head(n=10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [79]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [80]:
start_date_deposits.sum()

29500

In [81]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [82]:
start_date_deposits.head(10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [83]:
start_date_deposits.sum()

29500

## Introduction to pandas DataFrames

In [84]:
import pandas as pd

## Creating DataFrames from Scratch

<u>**#1: Construct a DataFrame from a dictionary of lists**</u>

In [85]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600,3333]}
df = pd.DataFrame(data)
df  # easy way to create a dataframe

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,3333


In [86]:
data = {'ProductName':['Product A', 'Product B', 'Product C', 'Product D'], 'ProductPrice':[22250, 16600, 12500,234324]}
df = pd.DataFrame(data) #
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500
3,Product D,234324


<u>**#2: Construct a DataFrame from a dictionary of lists + specify an index**</u>

In [87]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data, index = ['B', 'DD', 'G'])
df

Unnamed: 0,ProductName,ProductPrice
B,Product A,22250
DD,Product B,16600
G,Product C,12500


In [88]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data, index = [1, 2, 3])
df

Unnamed: 0,ProductName,ProductPrice
1,Product A,22250
2,Product B,16600
3,Product C,12500


In [89]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
product_IDs = ['A', 'B', 'C']
df = pd.DataFrame(data, index = product_IDs)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


<u>**#3: Construct a DataFrame from a list of dictionaries**<u/>

In [117]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':12500},{'ProductName':'Product D', 'ProductPrice':12550}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500
3,Product D,12550


In [118]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':[12500, 100000]}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,"[12500, 100000]"


In [92]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':12500},
        {'ProductName':'Product D'}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250.0
1,Product B,16600.0
2,Product C,12500.0
3,Product D,


<u>**#4: Construct a DataFrame from a dictionary of pandas Series**<u/>

In [93]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'])
ser_prices = pd.Series([22250, 16600, 12500])

In [94]:
data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [95]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['A', 'B', 'C'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [96]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['C', 'B', 'A'])

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,12500
B,Product B,16600
C,Product C,22250


<u>**#5: Construct a DataFrame from a list of lists**<u/>

In [97]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [98]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500, 5000]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,Product A,22250,
1,Product B,16600,
2,Product C,12500,5000.0


In [99]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [100]:
df.columns = ['ProductName', 'ProductPrice']
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [101]:
df.index = ['A', 'B', 'C']
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


<u>**#6: Construct a DataFrame in BEST way**<u/>

In [120]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]], 
                  columns = ['ProductName', 'ProductPrice'])  # , index = ['A', 'B', 'C']
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [121]:
df.shape

(3, 2)

In [122]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]], 
                  columns = ['ProductName', 'ProductPrice'],
                  index = [1, 2, 3])
df

Unnamed: 0,ProductName,ProductPrice
1,Product A,22250
2,Product B,16600
3,Product C,12500
