# Import Libraries

In [1]:
import pandas as pd
import numpy as np

# Series

## Introduction to pandas Series

#### List To Series

In [2]:
products = ['A', 'B', 'C', 'D']
products

['A', 'B', 'C', 'D']

In [3]:
type(products)

list

In [4]:
product_categories = pd.Series(products)

In [5]:
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [6]:
type(product_categories)

pandas.core.series.Series

In [7]:
type(pd.Series(products))

pandas.core.series.Series

In [8]:
daily_rates_dollars = pd.Series([40, 45, 50, 60])
daily_rates_dollars

0    40
1    45
2    50
3    60
dtype: int64

In [9]:
print(daily_rates_dollars)

0    40
1    45
2    50
3    60
dtype: int64


#### Array To Series

In [10]:
array_a = np.array([10, 20, 30, 40, 50])
array_a

array([10, 20, 30, 40, 50])

In [11]:
type(array_a)

numpy.ndarray

In [12]:
series_a = pd.Series(array_a)
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [13]:
print(series_a)

0    10
1    20
2    30
3    40
4    50
dtype: int64


In [14]:
type(series_a)

pandas.core.series.Series

## Working with Attributes in Python

In [15]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [16]:
series_a.dtype

dtype('int64')

In [17]:
series_a.size

5

In [18]:
product_categories = pd.Series(['A', 'B', 'C', 'D'])
product_categories

0    A
1    B
2    C
3    D
dtype: object

In [19]:
product_categories.dtype

dtype('O')

In [20]:
product_categories.size

4

In [21]:
type(product_categories.size)

int

In [23]:
print(product_categories.name)

None


In [24]:
product_categories.name = "Product Categories"
product_categories

0    A
1    B
2    C
3    D
Name: Product Categories, dtype: object

In [25]:
print(product_categories.name)

Product Categories


## Using an Index in pandas

In [26]:
prices_per_category = {'Product A': 22250, 'Product B' : 16600, 'Product C':15600}
prices_per_category

{'Product A': 22250, 'Product B': 16600, 'Product C': 15600}

In [27]:
type(prices_per_category)

dict

In [28]:
prices_per_category = pd.Series(prices_per_category)
prices_per_category

Product A    22250
Product B    16600
Product C    15600
dtype: int64

In [29]:
type(prices_per_category)

pandas.core.series.Series

In [30]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [31]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

## Label-based vs Position-based Indexing

In [32]:
series_a = pd.Series([10, 20, 30, 40, 50])
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [33]:
series_a.index

RangeIndex(start=0, stop=5, step=1)

In [34]:
type(series_a.index)

pandas.core.indexes.range.RangeIndex

In [35]:
list(series_a.index)

[0, 1, 2, 3, 4]

In [36]:
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [37]:
prices_per_category.index

Index(['Product A', 'Product B', 'Product C'], dtype='object')

In [38]:
type(prices_per_category.index)

pandas.core.indexes.base.Index

## More on Working with Indices in Python

In [39]:
series_a = pd.Series([10, 20, 30, 40, 50])
prices_per_category = pd.Series({'Product A': 22250, 'Product B' : 16600, 'Product C':12500})

In [40]:
series_a

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [41]:
series_a[0]

10

In [42]:
prices_per_category

Product A    22250
Product B    16600
Product C    12500
dtype: int64

In [43]:
prices_per_category['Product A']

22250

In [44]:
prices_per_category[0]

22250

In [45]:
series_b = pd.Series([10, 20, 30, 40, 50], index = [1, 2, 3, 4, 5])
series_b

1    10
2    20
3    30
4    40
5    50
dtype: int64

In [47]:
series_b.index

Int64Index([1, 2, 3, 4, 5], dtype='int64')

In [48]:
#this is an error because the index numbers start from 1
# series_b[0]

In [49]:
series_b[1]

10

In [50]:
series_c = pd.Series([10, 20, 30, 40, 50], index = ["1", "2", "3", "4", "5"])
series_c

1    10
2    20
3    30
4    40
5    50
dtype: int64

In [51]:
series_c[1]

20

In [52]:
series_c["1"]

10

In [53]:
series_c[0]

10

## Using Methods in Python

In [55]:
start_date_deposits = pd.Series({
    '7/4/2014'   : 2000,
    '1/2/2015'   : 2000,
    '12/8/2012'  : 1000,
    '2/20/2015'  : 2000,
    '10/28/2013' : 2000,
    '4/19/2015'  : 2000,
    '7/4/2016'   : 2000,
    '4/24/2014'  : 2000,
    '9/3/2015'   : 4000,
    '7/25/2016'  : 2000,
    '5/1/2014'   : 2000,
    '3/29/2013'  : 2000,
    '10/3/2014'  : 2000,
    '9/18/2015'  : 2500
})

In [56]:
start_date_deposits

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2000
10/3/2014     2000
9/18/2015     2500
dtype: int64

In [58]:
start_date_deposits.sum()

29500

In [59]:
start_date_deposits.min()

1000

In [60]:
start_date_deposits.max()

4000

In [61]:
start_date_deposits.idxmax()

'9/3/2015'

In [62]:
start_date_deposits.idxmin()

'12/8/2012'

In [63]:
start_date_deposits.head()

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
dtype: int64

In [64]:
start_date_deposits.tail()

7/25/2016    2000
5/1/2014     2000
3/29/2013    2000
10/3/2014    2000
9/18/2015    2500
dtype: int64

In [65]:
start_date_deposits.head(10)

7/4/2014      2000
1/2/2015      2000
12/8/2012     1000
2/20/2015     2000
10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
dtype: int64

In [66]:
start_date_deposits.tail(10)

10/28/2013    2000
4/19/2015     2000
7/4/2016      2000
4/24/2014     2000
9/3/2015      4000
7/25/2016     2000
5/1/2014      2000
3/29/2013     2000
10/3/2014     2000
9/18/2015     2500
dtype: int64

# DataFrames

## <u>**#1: Construct a DataFrame from a dictionary of lists**</u>

In [67]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


## <u>**#2: Construct a DataFrame from a dictionary of lists + specify an index**</u>

In [69]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
df = pd.DataFrame(data, index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [70]:
data = {'ProductName':['Product A', 'Product B', 'Product C'], 'ProductPrice':[22250, 16600, 12500]}
product_IDs = ['A', 'B', 'C']
df = pd.DataFrame(data, index = product_IDs)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


## <u>**#3: Construct a DataFrame from a list of dictionaries**<u/>

In [71]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':12500}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [72]:
data = [{'ProductName':'Product A', 'ProductPrice':22250}, 
        {'ProductName':'Product B', 'ProductPrice':16600}, 
        {'ProductName':'Product C', 'ProductPrice':[12500, 100000]}]
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,"[12500, 100000]"


## <u>**#4: Construct a DataFrame from a dictionary of pandas Series**<u/>

In [74]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'])
ser_prices = pd.Series([22250, 16600, 12500])

In [75]:
data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [76]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['A', 'B', 'C'])  ## ['A', 'B', 'C']

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [77]:
ser_products = pd.Series(['Product A', 'Product B', 'Product C'], index = ['A', 'B', 'C'])
ser_prices = pd.Series([22250, 16600, 12500], index = ['C', 'B', 'A']) ## ['C', 'B', 'A']

data = {'ProductName':ser_products, 'ProductPrice':ser_prices}
df = pd.DataFrame(data)
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,12500
B,Product B,16600
C,Product C,22250


## <u>**#5: Construct a DataFrame from a list of lists**<u/>

In [78]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [79]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500, 5000]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1,2
0,Product A,22250,
1,Product B,16600,
2,Product C,12500,5000.0


> There are a NaN values So we need to solve this problem by identify the name of columns and the number of columns

In [80]:
data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]]
df = pd.DataFrame(data)
df

Unnamed: 0,0,1
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [81]:
df.columns = ['ProductName', 'ProductPrice']
df

Unnamed: 0,ProductName,ProductPrice
0,Product A,22250
1,Product B,16600
2,Product C,12500


In [82]:
df.index = ['A', 'B', 'C']
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


## <u>**#6: Construct a DataFrame in a Professional Way**<u/>

In [83]:
df = pd.DataFrame(data = [['Product A', 22250], ['Product B', 16600], ['Product C', 12500]], 
                  columns = ['ProductName', 'ProductPrice'],
                  index = ['A', 'B', 'C'])
df

Unnamed: 0,ProductName,ProductPrice
A,Product A,22250
B,Product B,16600
C,Product C,12500


In [84]:
df.shape

(3, 2)