# Numpy Arrays for 1D Data

## Initializing and Converting to Numpy Array

In [0]:
import numpy as np

**Create array of zeros**

In [0]:
array1 = np.zeros(3)
print(array1)

[0. 0. 0.]


In [0]:
array2 = np.zeros(5)
print(array2)

[0. 0. 0. 0. 0.]


**Create array of ones**

In [0]:
array1 = np.ones(4)
print(array1)

[1. 1. 1. 1.]


**Create a Numpy Array**

In [0]:
array1 = np.array([1,2,3])
print(array1)

[1 2 3]


In [0]:
array2 = np.array([2,4,6])
print(array2)

[2 4 6]


**Convert Python list into an array**

In [0]:
python_list = [1,2,3,4,5]
numpy_array = np.array(python_list)
print(numpy_array)

[1 2 3 4 5]


**Arange**

In [0]:
array1 = np.arange(10)
print(array1)

[0 1 2 3 4 5 6 7 8 9]


In [0]:
array2 = np.arange(2,8)
print(array2)

[2 3 4 5 6 7]


In [0]:
array3 = np.arange(2,12,2)
print(array3)

[ 2  4  6  8 10]


**Linspace**

In [0]:
array4 = np.linspace(50,100,7)
print(array4)

[ 50.          58.33333333  66.66666667  75.          83.33333333
  91.66666667 100.        ]


In [0]:
array5 = np.linspace(1,20,3)
print(array5)

[ 1.  10.5 20. ]


## Accessing and Modifying Numpy Arrays

**Accessing through indexing**

In [0]:
print(array1)

[0 1 2 3 4 5 6 7 8 9]


In [0]:
print(array1[0])

0


In [0]:
print(array1[2])

2


In [0]:
for i in range(len(array1)):
  print(array1[i])

0
1
2
3
4
5
6
7
8
9


In [0]:
for i in np.nditer(array1):
  print(i)

0
1
2
3
4
5
6
7
8
9


**Selection through slicing**

In [0]:
print(array1[2:5])

[2 3 4]


In [0]:
print(array2[3:5])

[5 6]


In [0]:
print(array3[1:3])

[4 6]


**Append and concatenation**

In [0]:
np.append(array1, [1])

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1])

In [0]:
np.append(array1, [1,2,3])

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 1, 2, 3])

In [0]:
np.append(array1,array2)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 2, 3, 4, 5, 6, 7])

**Split**

In [0]:
np.split(array1,2)

[array([0, 1, 2, 3, 4]), array([5, 6, 7, 8, 9])]

In [0]:
np.split(array1,[3,5,6])

[array([0, 1, 2]), array([3, 4]), array([5]), array([6, 7, 8, 9])]

**Get Info**

In [0]:
array1.size

10

In [0]:
array1.shape

(10,)

## Numpy Array Math Operations

**Scalar and vector addition**

In [0]:
print(array1)

[0 1 2 3 4 5 6 7 8 9]


In [0]:
print(array1+5)

[ 5  6  7  8  9 10 11 12 13 14]


In [0]:
x1 = np.array([1,2,3])
x2 = np.array([10,20,30])
np.add(x1,x2)

array([11, 22, 33])

**Scalar and vector subtraction**

In [0]:
print(array1)
print(array1-10)

[0 1 2 3 4 5 6 7 8 9]
[-10  -9  -8  -7  -6  -5  -4  -3  -2  -1]


In [0]:
np.subtract(x1,x2)

array([ -9, -18, -27])

In [0]:
x1 - x2

array([ -9, -18, -27])

**Scalar and vector multiplication**

In [0]:
print(array2)
print(array2*3)

[2 3 4 5 6 7]
[ 6  9 12 15 18 21]


In [0]:
print(x1)
print(x2)

[1 2 3]
[10 20 30]


In [0]:
np.multiply(x1,x2)

array([10, 40, 90])

In [0]:
x1*x2

array([10, 40, 90])

**Scalar and vector division**

In [0]:
print(array2)
print(array2/20)

[2 3 4 5 6 7]
[0.1  0.15 0.2  0.25 0.3  0.35]


In [0]:
np.divide(x1,x2)

array([0.1, 0.1, 0.1])

In [0]:
x1/x2

array([0.1, 0.1, 0.1])

**Exponentials and roots**

In [0]:
print(array1)
print(array1**2)

[0 1 2 3 4 5 6 7 8 9]
[ 0  1  4  9 16 25 36 49 64 81]


In [0]:
np.power(x1,x2)

array([              1,         1048576, 205891132094649])

In [0]:
print(array1)
print(array1**(1/2))

[0 1 2 3 4 5 6 7 8 9]
[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]


In [0]:
np.power(x1,1.0/x2)

array([1.        , 1.03526492, 1.0372992 ])

In [0]:
print(np.dot([1,2,3],[2,4,6]))

28


## Numpy Array Statistics Operations

**Min/Max/Range**

In [0]:
array1_min = array1.min()
print(array1_min)

0


In [0]:
array1_max = array1.max()
print(array1_max)

9


In [0]:
array1_range = array1_max - array1_min
print(array1_range)

9


In [0]:
array2_min = array2.min()
print(array2_min)

2


In [0]:
array2_max = array2.max()
print(array2_max)

7


In [0]:
array2_range = array2_max - array2_min
print(array2_range)

5


**Sum and total**

In [0]:
array1.sum()


45

In [0]:
array2.sum()

27

In [0]:
array3.sum()

30

**Count and frequency**

In [0]:
np.unique([2,0,1,3,2,1,5,1,6,5,8], return_counts=True)


(array([0, 1, 2, 3, 5, 6, 8]), array([1, 3, 2, 1, 2, 1, 1]))

**Mean and median**

In [0]:
print(np.mean(array1))

4.5


In [0]:
print(np.median(array1))

4.5


In [0]:
print(np.median([1,3,3,4,5,5,5,6,7,7,8,9,9,10,11,12,12]))

7.0


**Variance and standard deviation**

In [0]:
np.var(array1)

8.25

In [0]:
np.var(array2)

2.9166666666666665

In [0]:
np.std(array1)

2.8722813232690143

In [0]:
np.std(array2)

1.707825127659933

# Pandas Series for 1D Data

## Initializing and Converting to Panda Series

In [0]:
import pandas as pd


**Create Series without index (list)**

In [0]:
s1 = pd.Series([1,2,3,4,5])
print(s1)


0    1
1    2
2    3
3    4
4    5
dtype: int64


**Create Series with index (dictionary)**

In [0]:
s2 = pd.Series({'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5})
print(s2)


a    1
b    2
c    3
d    4
e    5
dtype: int64


**Convert Numpy Array to Series**

In [0]:
s3 = pd.Series(array1)
print(s3)

0    0
1    1
2    2
3    3
4    4
5    5
6    6
7    7
8    8
9    9
dtype: int64


## Accessing and Selection

In [0]:
s = pd.Series({'A': 2, 'B': 4, 'C': 5, 'E': 8})

In [0]:
print(s[2])

5


**Selection by label**

In [0]:
s.loc['E']

8

**Selection by position**

In [0]:
s.iloc[1]

4

**Selection by slicing**

In [0]:
s[1:3]

B    4
C    5
dtype: int64

**Iterations**

In [0]:
for item in s:
  print(s)

A    2
B    4
C    5
E    8
dtype: int64
A    2
B    4
C    5
E    8
dtype: int64
A    2
B    4
C    5
E    8
dtype: int64
A    2
B    4
C    5
E    8
dtype: int64


**Access by condition (AND OR NOT)**

In [0]:
print(s[s == 4])

B    4
dtype: int64


In [0]:
print(s[s > 4])

C    5
E    8
dtype: int64


In [0]:
print(s[s <= 3])

A    2
dtype: int64


In [0]:
print(s[(s >= 2) & (s < 5)])

A    2
B    4
dtype: int64


In [0]:
print(s[(s <= 3) | (s > 4)])

A    2
C    5
E    8
dtype: int64


In [0]:
print(s[~(s == 4)])

A    2
C    5
E    8
dtype: int64


**Limits and sorting**

In [0]:
s1.head(1)

0    1
dtype: int64

In [0]:
s2.tail(2)

d    4
e    5
dtype: int64

In [0]:
s = pd.Series([3,2,6,4,1,8,3,5,2,9,3,2])

In [0]:
s.sort_values()

4     1
1     2
8     2
11    2
0     3
6     3
10    3
3     4
7     5
2     6
5     8
9     9
dtype: int64

In [0]:
s.sort_values().head(2)

4    1
1    2
dtype: int64

## Modifying Panda Series

**Append**

In [0]:
s1 = pd.Series([1,2,3])
s2 = pd.Series([4,5,6])

In [0]:
print(s1.append(s2))

0    1
1    2
2    3
0    4
1    5
2    6
dtype: int64


In [0]:
print(pd.concat([s1,s2]))

0    1
1    2
2    3
0    4
1    5
2    6
dtype: int64


**Deleting from Series**

In [0]:
print(s1)
print(s1.drop([2]))

0    1
1    2
2    3
dtype: int64
0    1
1    2
dtype: int64


**Updating to Series**

In [0]:
s1 = pd.Series([12,3,4,2,9])
s2 = pd.Series([8,6], index=[2,4])
s1.update(s2)
print(s1)

0    12
1     3
2     8
3     2
4     6
dtype: int64


## Panda Series Math Operations

**Scalar and vector addition**

In [0]:
s1 = pd.Series([1,2,3,4,5])
s2 = pd.Series([5,10,15,20,25])

In [0]:
print(s1+10)

0    11
1    12
2    13
3    14
4    15
dtype: int64


In [0]:
print(s1 + s2)

0     6
1    12
2    18
3    24
4    30
dtype: int64


**Scalar and vector subtraction**

In [0]:
print(s1-10)

0   -9
1   -8
2   -7
3   -6
4   -5
dtype: int64


In [0]:
print(s1 - s2)

0    -4
1    -8
2   -12
3   -16
4   -20
dtype: int64


**Scalar and vector multiplication**

In [0]:
s1 * s2

0      5
1     20
2     45
3     80
4    125
dtype: int64

**Scalar and vector division**

In [0]:
print(s1/4)

0    0.25
1    0.50
2    0.75
3    1.00
4    1.25
dtype: float64


In [0]:
print(s1/s2)

0    0.2
1    0.2
2    0.2
3    0.2
4    0.2
dtype: float64


**Exponentials and roots**

In [0]:
print(s1**2)

0     1
1     4
2     9
3    16
4    25
dtype: int64


In [0]:
print(s1**(1/2))

0    1.000000
1    1.414214
2    1.732051
3    2.000000
4    2.236068
dtype: float64


## Panda Series Statistics Operations

**Min/Max/Range**


In [0]:
s1_min = s1.min()
print(s1_min)

1


In [0]:
s1_max = s1.max()
print(s1_max)

5


In [0]:
s1_range = s1_max - s1_min
print(s1_range)

4


**Sum and total**

In [0]:
s1.sum()

15

In [0]:
s2.sum()

75

**Count and frequency**

In [0]:
s.value_counts()

3    3
2    3
9    1
8    1
6    1
5    1
4    1
1    1
dtype: int64

**Mean and median**

In [0]:
s1.mean()

3.0

In [0]:
s1.median()

3.0

In [0]:
s2.mean()

15.0

In [0]:
s2.median()

15.0

**Variance and standard deviation**

In [0]:
s1.var()

2.5

In [0]:
s1.std()

1.5811388300841898

**Quantile**

In [0]:
s1.quantile([0, .1, .25, .5, .75, .8, 1])

0.00    1.0
0.10    1.4
0.25    2.0
0.50    3.0
0.75    4.0
0.80    4.2
1.00    5.0
dtype: float64

**Cumulative Sum**

In [0]:
s1.cumsum()

0     1
1     3
2     6
3    10
4    15
dtype: int64

**Apply and lambda**

In [0]:
s1.apply(np.sqrt)

0    1.000000
1    1.414214
2    1.732051
3    2.000000
4    2.236068
dtype: float64

In [0]:
s2

0     5
1    10
2    15
3    20
4    25
dtype: int64

In [0]:
s2.apply(lambda x: False if x < 15 else True)

0    False
1    False
2     True
3     True
4     True
dtype: bool

# Numpy Matrices for 2D Data

## Initializing and Converting to Numpy Matrix

**Numpy Matrix by string**

In [0]:
a = np.matrix('1 2 3; 4 5 6')
print(a)

[[1 2 3]
 [4 5 6]]


**Numpy Matrix by double list**

In [0]:
b = np.matrix([[1,2,3,4],[5,6,7,8]])
print(b)

[[1 2 3 4]
 [5 6 7 8]]


In [0]:
a1 = np.array([1,2,3])
a2 = np.array([2,3,4])
a3 = np.array([5,6,7])
a4 = np.array([9,4,5])
m = np.matrix([a1,a2,a3,a4])
print(m)

[[1 2 3]
 [2 3 4]
 [5 6 7]
 [9 4 5]]


## Accessing and Modifying Numpy Matrices

**Accessing rows**

In [0]:
print(m[0])

[[1 2 3]]


In [0]:
print(m[1])

[[2 3 4]]


In [0]:
print(m[2])

[[5 6 7]]


**Accessing columns**

In [0]:
print(m[:,0])

[[1]
 [2]
 [5]
 [9]]


In [0]:
print(m[:,1])

[[2]
 [3]
 [6]
 [4]]


In [0]:
print(m[:,2])

[[3]
 [4]
 [7]
 [5]]


**Accessing elements**

In [0]:
print(m[2,0])

5


In [0]:
print(m[2,1])

6


In [0]:
print(m)

[[1 2 3]
 [2 3 4]
 [5 6 7]
 [9 4 5]]


**Slicing the matrix**

In [0]:
print(m[1:3])

[[2 3 4]
 [5 6 7]]


In [0]:
print(m[:,1:3])

[[2 3]
 [3 4]
 [6 7]
 [4 5]]


In [0]:
m[0:2,1:3]

matrix([[2, 3],
        [3, 4]])

**Iterating through the matrix**

In [0]:
for i in np.nditer(m):
  print(i)

1
2
3
2
3
4
5
6
7
9
4
5


**Get additional information**

In [0]:
m.shape

(4, 3)

In [0]:
m.size

12

## Modifying the Matrix

**Inserting rows**

In [0]:
np.vstack((m,[1,2,3]))

matrix([[1, 2, 3],
        [2, 3, 4],
        [5, 6, 7],
        [9, 4, 5],
        [1, 2, 3]])

**Inserting columns**

In [0]:
np.hstack((m,[[10],[20],[30],[40]]))

matrix([[ 1,  2,  3, 10],
        [ 2,  3,  4, 20],
        [ 5,  6,  7, 30],
        [ 9,  4,  5, 40]])

**Modifying rows and columns**

In [0]:
print(m)

[[1 2 3]
 [2 3 4]
 [5 6 7]
 [9 4 5]]


In [0]:
m[0] = [10,20,30]
print(m)

[[10 20 30]
 [ 2  3  4]
 [ 5  6  7]
 [ 9  4  5]]


In [0]:
m[:,2] = [[-1],[-2],[-3],[-4]]
print(m)

[[10 20 -1]
 [ 2  3 -2]
 [ 5  6 -3]
 [ 9  4 -4]]


**Deleting rows and columns**

In [0]:
np.delete(m,1,0)

matrix([[10, 20, -1],
        [ 5,  6, -3],
        [ 9,  4, -4]])

In [0]:
np.delete(m,2,1)

matrix([[10, 20],
        [ 2,  3],
        [ 5,  6],
        [ 9,  4]])

**Reshape**

In [0]:
m = np.matrix('1 2 3 4; 5 6 7 8; 9 10 11 12; 12 14 15 16; 17 18 19 20')

In [0]:
m

matrix([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12],
        [12, 14, 15, 16],
        [17, 18, 19, 20]])

In [0]:
m.reshape(2,10)

matrix([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
        [11, 12, 12, 14, 15, 16, 17, 18, 19, 20]])

In [0]:
m.reshape(5,4)

matrix([[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12],
        [12, 14, 15, 16],
        [17, 18, 19, 20]])

In [0]:
m.T

matrix([[ 1,  5,  9, 12, 17],
        [ 2,  6, 10, 14, 18],
        [ 3,  7, 11, 15, 19],
        [ 4,  8, 12, 16, 20]])

## Numpy Matrix Math Operations

**Scalar and vector addition**

In [0]:
print(m+20)

[[21 22 23 24]
 [25 26 27 28]
 [29 30 31 32]
 [32 34 35 36]
 [37 38 39 40]]


In [0]:
m1 = np.matrix('1 2 3; 4 5 6; 7 8 9; 10 11 13')
m2 = np.matrix('2 4 6; 8 10 12; 14 16 18; 20 22 24')
print(m1 + m2)

[[ 3  6  9]
 [12 15 18]
 [21 24 27]
 [30 33 37]]


**Scalar and vector subtraction**

In [0]:
print(m1-10)

[[-9 -8 -7]
 [-6 -5 -4]
 [-3 -2 -1]
 [ 0  1  3]]


In [0]:
print(m1-m2)

[[ -1  -2  -3]
 [ -4  -5  -6]
 [ -7  -8  -9]
 [-10 -11 -11]]


**Scalar and vector multiplication**

In [0]:
m1 * 100

matrix([[ 100,  200,  300],
        [ 400,  500,  600],
        [ 700,  800,  900],
        [1000, 1100, 1300]])

In [0]:
a = np.array([1,2,3])
m = np.matrix('1 2; 3 4; 5 6')
a*m

matrix([[22, 28]])

In [0]:
np.dot(a,m)

matrix([[22, 28]])

**Scalar and vector division**

In [0]:
m2/20

matrix([[0.1, 0.2, 0.3],
        [0.4, 0.5, 0.6],
        [0.7, 0.8, 0.9],
        [1. , 1.1, 1.2]])

In [0]:
m = np.matrix('1 2 3; 4 5 6; 7 8 9')

In [0]:
a/m

matrix([[1.        , 1.        , 1.        ],
        [0.25      , 0.4       , 0.5       ],
        [0.14285714, 0.25      , 0.33333333]])

**Exponentials and roots**

In [0]:
m**3

matrix([[ 468,  576,  684],
        [1062, 1305, 1548],
        [1656, 2034, 2412]])

In [0]:
np.sqrt(m)

matrix([[1.        , 1.41421356, 1.73205081],
        [2.        , 2.23606798, 2.44948974],
        [2.64575131, 2.82842712, 3.        ]])

## Numpy Matrix Statistics Operations

**Mean**

In [0]:
m.mean()

5.0

**Variance**

In [0]:
m.var()

6.666666666666667

**Standard Deviation**

In [0]:
m.std()

2.581988897471611

# Pandas DataFrame for 2D Data

## Initializing and Converting to Panda DataFrames

In [0]:
import pandas as pd

**Create DataFrame without index (list)**

In [13]:
df = pd.DataFrame([[1,2,3],[4,5,6],[7,8,9]])
print(df)

   0  1  2
0  1  2  3
1  4  5  6
2  7  8  9


**Create DataFrame with index (dictionary)**

In [14]:
df2 = pd.DataFrame({'col1': [2,4,5], 'col2': [10,20,30], 'col3': [3,9,12]})
print(df2)

   col1  col2  col3
0     2    10     3
1     4    20     9
2     5    30    12


**Convert Numpy Matrix to DataFrame**

In [0]:
m = np.matrix([[1,2,3],[2,4,6],[6,3,4]])

In [16]:
df = pd.DataFrame(m)
print(df)

   0  1  2
0  1  2  3
1  2  4  6
2  6  3  4


**Convert files and databases**

In [17]:
from google.colab import files
uploaded = files.upload()
for fn in uploaded.keys():
  print('User uploaded file "{name}" with {length} bytes'.format(name=fn,length=len(uploaded[fn])))

Saving Sales.db to Sales (1).db
User uploaded file "Sales.db" with 28672 bytes


In [0]:
import sqlite3

con = sqlite3.connect("Sales.db")

In [0]:
employee_df = pd.read_sql_query("SELECT * FROM Employee", con)


In [20]:
print(employee_df)

    Employee_ID First_Name Last_Name                 Department           City
0         10001     Daniel     Olson                Electronics  San Francisco
1         10002       Nick   Markmen                    Clothes  San Francisco
2         10003      Maria  Gonzales                Electronics       San Jose
3         10004     George      Wong                    Clothes  San Francisco
4         10005  Stephanie  Williams                    Clothes        Oakland
5         10006     Miguel      Alva                    Clothes        Oakland
6         10007    Jessica   Collins                  Cosmetics  San Francisco
7         10008        Ted  Anderson                    Clothes        Oakland
8         10009   Victoria    Garcia                    Clothes       San Jose
9         10010    Jasmine      Khan  Pharmacy, Health & Beauty        Oakland
10        10011       Adam     Smith  Pharmacy, Health & Beauty       San Jose
11        10012     Rachel       Kim                

In [21]:
customer_df = pd.read_sql_query("SELECT * FROM Customer", con)
print(customer_df)

    Customer_ID First_Name Last_Name           City
0         20001       Jack      Ward       San Jose
1         20002     Steven  Martinez  San Francisco
2         20003    Jessica   Collins       San Jose
3         20004      Carie  Robinson  San Francisco
4         20005       Zack  Peterson        Oakland
5         20006     Bianca   Sanchez  San Francisco
6         20007      James      Owen        Oakland
7         20008       Lisa     Smith  San Francisco
8         20009     Daniel  Yasukawa        Oakland
9         20010     Lauren      Pham       San Jose
10        20011       Juan      Diaz        Oakland
11        20012     Martha      Diaz  San Francisco


In [22]:
product_df = pd.read_sql_query("SELECT * FROM Product", con)
print(product_df)

    Product_ID Product_Name   Price  Supplier_ID          Supplier_Name
0        30001      T-Shirt   12.98        40004       Studio Warehouse
1        30002  Tooth Paste    4.50        40002             BioMed Inc
2        30003     Speakers   56.99        40005          CompTech, Inc
3        30004        Pants   34.35        40003             FashionRUs
4        30005   Microphone   46.21        40001  Music Vibrations, Inc
5        30006          Tie   11.09        40003             FashionRUs
6        30007   Chap Stick    3.67        40002             BioMed Inc
7        30008     Medicine   23.96        40002             BioMed Inc
8        30009   Headphones   31.44        40001  Music Vibrations, Inc
9        30010       Lotion   16.77        40002             BioMed Inc
10       30011   DVD Player  149.99        40005          CompTech, Inc
11       30012         Coat   56.33        40004       Studio Warehouse


In [23]:
orders_df = pd.read_sql_query("SELECT * FROM Orders", con)
print(orders_df)

    Order_ID  Customer_ID  Product_ID  Employee_ID      Date
0      50001        20005       30005        10001  17-02-12
1      50002        20003       30008        10011  17-09-03
2      50003        20010       30001        10009  17-11-23
3      50004        20004       30008        10010  17-01-28
4      50005        20008       30012        10002  17-08-11
5      50006        20011       30011        10001  17-04-13
6      50007        20012       30002        10007  17-10-07
7      50008        20002       30001        10004  17-03-09
8      50009        20001       30006        10009  17-12-26
9      50010        20013       30004        10008  17-04-11
10     50011        20014       30007        10010  17-09-16


In [24]:
supplier_df = pd.read_sql_query("SELECT * FROM Supplier", con)
print(supplier_df)

   Supplier_ID                   Name           City
0        40001  Music Vibrations, Inc        Oakland
1        40002             BioMed Inc  San Francisco
2        40003             FashionRUs       San Jose
3        40004       Studio Warehouse  San Francisco
4        40005          CompTech, Inc       San Jose


## Accessing and Selection

In [25]:
employee_df['First_Name']

0        Daniel
1          Nick
2         Maria
3        George
4     Stephanie
5        Miguel
6       Jessica
7           Ted
8      Victoria
9       Jasmine
10         Adam
11       Rachel
Name: First_Name, dtype: object

In [26]:
orders_df['Order_ID']

0     50001
1     50002
2     50003
3     50004
4     50005
5     50006
6     50007
7     50008
8     50009
9     50010
10    50011
Name: Order_ID, dtype: int64

In [27]:
customer_df['Last_Name'][0]

'Ward'

In [28]:
supplier_df['Name'][1]

'BioMed Inc'

**Selection by label**

In [29]:
df2.loc[:,['col1','col2']]

Unnamed: 0,col1,col2
0,2,10
1,4,20
2,5,30


In [30]:
customer_df.loc[[0],['First_Name','Last_Name']]

Unnamed: 0,First_Name,Last_Name
0,Jack,Ward


In [31]:
customer_df.loc[[0,1],['First_Name']]

Unnamed: 0,First_Name
0,Jack
1,Steven


**Selection by position**

In [32]:
product_df.iloc[1]

Product_ID             30002
Product_Name     Tooth Paste
Price                    4.5
Supplier_ID            40002
Supplier_Name     BioMed Inc
Name: 1, dtype: object

In [33]:
product_df.iloc[3]

Product_ID            30004
Product_Name          Pants
Price                 34.35
Supplier_ID           40003
Supplier_Name    FashionRUs
Name: 3, dtype: object

In [34]:
customer_df.iloc[:,1]

0        Jack
1      Steven
2     Jessica
3       Carie
4        Zack
5      Bianca
6       James
7        Lisa
8      Daniel
9      Lauren
10       Juan
11     Martha
Name: First_Name, dtype: object

**Selection by slicing**

In [35]:
customer_df.loc[0:3,['Customer_ID']]

Unnamed: 0,Customer_ID
0,20001
1,20002
2,20003
3,20004


In [36]:
 customer_df.loc[[1],'Customer_ID':'Last_Name']

Unnamed: 0,Customer_ID,First_Name,Last_Name
1,20002,Steven,Martinez


In [37]:
customer_df.iloc[1:4,1]

1     Steven
2    Jessica
3      Carie
Name: First_Name, dtype: object

In [38]:
customer_df.iloc[[2],1:4]

Unnamed: 0,First_Name,Last_Name,City
2,Jessica,Collins,San Jose


**Access by condition AND OR NOT**

In [39]:
product_df[product_df['Product_ID'] == 30006]

Unnamed: 0,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name
5,30006,Tie,11.09,40003,FashionRUs


In [40]:
product_df[product_df['Price'] > 50]

Unnamed: 0,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name
2,30003,Speakers,56.99,40005,"CompTech, Inc"
10,30011,DVD Player,149.99,40005,"CompTech, Inc"
11,30012,Coat,56.33,40004,Studio Warehouse


In [41]:
product_df[(product_df['Product_ID'] > 30005) & (product_df['Price'] < 30)]

Unnamed: 0,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name
5,30006,Tie,11.09,40003,FashionRUs
6,30007,Chap Stick,3.67,40002,BioMed Inc
7,30008,Medicine,23.96,40002,BioMed Inc
9,30010,Lotion,16.77,40002,BioMed Inc


In [42]:
product_df[(product_df['Price'] < 15) & (product_df['Supplier_Name'] == 'BioMed Inc')]

Unnamed: 0,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name
1,30002,Tooth Paste,4.5,40002,BioMed Inc
6,30007,Chap Stick,3.67,40002,BioMed Inc


In [43]:
product_df[(product_df['Product_Name'] == 'Pants') | (product_df['Product_Name'] == 'Tie')]

Unnamed: 0,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name
3,30004,Pants,34.35,40003,FashionRUs
5,30006,Tie,11.09,40003,FashionRUs


In [44]:
product_df[~(product_df['Supplier_Name'] == 'FashionRUs')]

Unnamed: 0,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name
0,30001,T-Shirt,12.98,40004,Studio Warehouse
1,30002,Tooth Paste,4.5,40002,BioMed Inc
2,30003,Speakers,56.99,40005,"CompTech, Inc"
4,30005,Microphone,46.21,40001,"Music Vibrations, Inc"
6,30007,Chap Stick,3.67,40002,BioMed Inc
7,30008,Medicine,23.96,40002,BioMed Inc
8,30009,Headphones,31.44,40001,"Music Vibrations, Inc"
9,30010,Lotion,16.77,40002,BioMed Inc
10,30011,DVD Player,149.99,40005,"CompTech, Inc"
11,30012,Coat,56.33,40004,Studio Warehouse


**Iterations**

In [45]:
print(employee_df)

    Employee_ID First_Name Last_Name                 Department           City
0         10001     Daniel     Olson                Electronics  San Francisco
1         10002       Nick   Markmen                    Clothes  San Francisco
2         10003      Maria  Gonzales                Electronics       San Jose
3         10004     George      Wong                    Clothes  San Francisco
4         10005  Stephanie  Williams                    Clothes        Oakland
5         10006     Miguel      Alva                    Clothes        Oakland
6         10007    Jessica   Collins                  Cosmetics  San Francisco
7         10008        Ted  Anderson                    Clothes        Oakland
8         10009   Victoria    Garcia                    Clothes       San Jose
9         10010    Jasmine      Khan  Pharmacy, Health & Beauty        Oakland
10        10011       Adam     Smith  Pharmacy, Health & Beauty       San Jose
11        10012     Rachel       Kim                

In [46]:
for row in employee_df.itertuples():
  print(row)

Pandas(Index=0, Employee_ID=10001, First_Name='Daniel', Last_Name='Olson', Department='Electronics', City='San Francisco')
Pandas(Index=1, Employee_ID=10002, First_Name='Nick', Last_Name='Markmen', Department='Clothes', City='San Francisco')
Pandas(Index=2, Employee_ID=10003, First_Name='Maria', Last_Name='Gonzales', Department='Electronics', City='San Jose')
Pandas(Index=3, Employee_ID=10004, First_Name='George', Last_Name='Wong', Department='Clothes', City='San Francisco')
Pandas(Index=4, Employee_ID=10005, First_Name='Stephanie', Last_Name='Williams', Department='Clothes', City='Oakland')
Pandas(Index=5, Employee_ID=10006, First_Name='Miguel', Last_Name='Alva', Department='Clothes', City='Oakland')
Pandas(Index=6, Employee_ID=10007, First_Name='Jessica', Last_Name='Collins', Department='Cosmetics', City='San Francisco')
Pandas(Index=7, Employee_ID=10008, First_Name='Ted', Last_Name='Anderson', Department='Clothes', City='Oakland')
Pandas(Index=8, Employee_ID=10009, First_Name='Victo

In [47]:
for index, row in employee_df.iterrows():
  print(index, row)

0 Employee_ID            10001
First_Name            Daniel
Last_Name              Olson
Department       Electronics
City           San Francisco
Name: 0, dtype: object
1 Employee_ID            10002
First_Name              Nick
Last_Name            Markmen
Department           Clothes
City           San Francisco
Name: 1, dtype: object
2 Employee_ID          10003
First_Name           Maria
Last_Name         Gonzales
Department     Electronics
City              San Jose
Name: 2, dtype: object
3 Employee_ID            10004
First_Name            George
Last_Name               Wong
Department           Clothes
City           San Francisco
Name: 3, dtype: object
4 Employee_ID        10005
First_Name     Stephanie
Last_Name       Williams
Department       Clothes
City             Oakland
Name: 4, dtype: object
5 Employee_ID      10006
First_Name      Miguel
Last_Name         Alva
Department     Clothes
City           Oakland
Name: 5, dtype: object
6 Employee_ID            10007
First_Nam

In [48]:
for column in employee_df:
  print(employee_df[column])

0     10001
1     10002
2     10003
3     10004
4     10005
5     10006
6     10007
7     10008
8     10009
9     10010
10    10011
11    10012
Name: Employee_ID, dtype: int64
0        Daniel
1          Nick
2         Maria
3        George
4     Stephanie
5        Miguel
6       Jessica
7           Ted
8      Victoria
9       Jasmine
10         Adam
11       Rachel
Name: First_Name, dtype: object
0        Olson
1      Markmen
2     Gonzales
3         Wong
4     Williams
5         Alva
6      Collins
7     Anderson
8       Garcia
9         Khan
10       Smith
11         Kim
Name: Last_Name, dtype: object
0                   Electronics
1                       Clothes
2                   Electronics
3                       Clothes
4                       Clothes
5                       Clothes
6                     Cosmetics
7                       Clothes
8                       Clothes
9     Pharmacy, Health & Beauty
10    Pharmacy, Health & Beauty
11                  Electronics
Name:

**Limits and sorting**

In [49]:
employee_df

Unnamed: 0,Employee_ID,First_Name,Last_Name,Department,City
0,10001,Daniel,Olson,Electronics,San Francisco
1,10002,Nick,Markmen,Clothes,San Francisco
2,10003,Maria,Gonzales,Electronics,San Jose
3,10004,George,Wong,Clothes,San Francisco
4,10005,Stephanie,Williams,Clothes,Oakland
5,10006,Miguel,Alva,Clothes,Oakland
6,10007,Jessica,Collins,Cosmetics,San Francisco
7,10008,Ted,Anderson,Clothes,Oakland
8,10009,Victoria,Garcia,Clothes,San Jose
9,10010,Jasmine,Khan,"Pharmacy, Health & Beauty",Oakland


In [50]:
employee_df.sort_values(by=['Last_Name'])

Unnamed: 0,Employee_ID,First_Name,Last_Name,Department,City
5,10006,Miguel,Alva,Clothes,Oakland
7,10008,Ted,Anderson,Clothes,Oakland
6,10007,Jessica,Collins,Cosmetics,San Francisco
8,10009,Victoria,Garcia,Clothes,San Jose
2,10003,Maria,Gonzales,Electronics,San Jose
9,10010,Jasmine,Khan,"Pharmacy, Health & Beauty",Oakland
11,10012,Rachel,Kim,Electronics,San Jose
1,10002,Nick,Markmen,Clothes,San Francisco
0,10001,Daniel,Olson,Electronics,San Francisco
10,10011,Adam,Smith,"Pharmacy, Health & Beauty",San Jose


In [51]:
employee_df.sort_values(by=['Department'])

Unnamed: 0,Employee_ID,First_Name,Last_Name,Department,City
1,10002,Nick,Markmen,Clothes,San Francisco
3,10004,George,Wong,Clothes,San Francisco
4,10005,Stephanie,Williams,Clothes,Oakland
5,10006,Miguel,Alva,Clothes,Oakland
7,10008,Ted,Anderson,Clothes,Oakland
8,10009,Victoria,Garcia,Clothes,San Jose
6,10007,Jessica,Collins,Cosmetics,San Francisco
0,10001,Daniel,Olson,Electronics,San Francisco
2,10003,Maria,Gonzales,Electronics,San Jose
11,10012,Rachel,Kim,Electronics,San Jose


In [52]:
employee_df.head(2)

Unnamed: 0,Employee_ID,First_Name,Last_Name,Department,City
0,10001,Daniel,Olson,Electronics,San Francisco
1,10002,Nick,Markmen,Clothes,San Francisco


In [53]:
employee_df.tail(3)

Unnamed: 0,Employee_ID,First_Name,Last_Name,Department,City
9,10010,Jasmine,Khan,"Pharmacy, Health & Beauty",Oakland
10,10011,Adam,Smith,"Pharmacy, Health & Beauty",San Jose
11,10012,Rachel,Kim,Electronics,San Jose


In [54]:
employee_df.sort_values(by=['Last_Name']).head(3)

Unnamed: 0,Employee_ID,First_Name,Last_Name,Department,City
5,10006,Miguel,Alva,Clothes,Oakland
7,10008,Ted,Anderson,Clothes,Oakland
6,10007,Jessica,Collins,Cosmetics,San Francisco


## Modifying Panda DataFrames

**Merging**

In [57]:
pd.merge(customer_df, orders_df, how='inner')

Unnamed: 0,Customer_ID,First_Name,Last_Name,City,Order_ID,Product_ID,Employee_ID,Date
0,20001,Jack,Ward,San Jose,50009,30006,10009,17-12-26
1,20002,Steven,Martinez,San Francisco,50008,30001,10004,17-03-09
2,20003,Jessica,Collins,San Jose,50002,30008,10011,17-09-03
3,20004,Carie,Robinson,San Francisco,50004,30008,10010,17-01-28
4,20005,Zack,Peterson,Oakland,50001,30005,10001,17-02-12
5,20008,Lisa,Smith,San Francisco,50005,30012,10002,17-08-11
6,20010,Lauren,Pham,San Jose,50003,30001,10009,17-11-23
7,20011,Juan,Diaz,Oakland,50006,30011,10001,17-04-13
8,20012,Martha,Diaz,San Francisco,50007,30002,10007,17-10-07


In [58]:
pd.merge(customer_df, orders_df, how='left')


Unnamed: 0,Customer_ID,First_Name,Last_Name,City,Order_ID,Product_ID,Employee_ID,Date
0,20001,Jack,Ward,San Jose,50009.0,30006.0,10009.0,17-12-26
1,20002,Steven,Martinez,San Francisco,50008.0,30001.0,10004.0,17-03-09
2,20003,Jessica,Collins,San Jose,50002.0,30008.0,10011.0,17-09-03
3,20004,Carie,Robinson,San Francisco,50004.0,30008.0,10010.0,17-01-28
4,20005,Zack,Peterson,Oakland,50001.0,30005.0,10001.0,17-02-12
5,20006,Bianca,Sanchez,San Francisco,,,,
6,20007,James,Owen,Oakland,,,,
7,20008,Lisa,Smith,San Francisco,50005.0,30012.0,10002.0,17-08-11
8,20009,Daniel,Yasukawa,Oakland,,,,
9,20010,Lauren,Pham,San Jose,50003.0,30001.0,10009.0,17-11-23


In [59]:
pd.merge(customer_df, orders_df, how='right')


Unnamed: 0,Customer_ID,First_Name,Last_Name,City,Order_ID,Product_ID,Employee_ID,Date
0,20001,Jack,Ward,San Jose,50009,30006,10009,17-12-26
1,20002,Steven,Martinez,San Francisco,50008,30001,10004,17-03-09
2,20003,Jessica,Collins,San Jose,50002,30008,10011,17-09-03
3,20004,Carie,Robinson,San Francisco,50004,30008,10010,17-01-28
4,20005,Zack,Peterson,Oakland,50001,30005,10001,17-02-12
5,20008,Lisa,Smith,San Francisco,50005,30012,10002,17-08-11
6,20010,Lauren,Pham,San Jose,50003,30001,10009,17-11-23
7,20011,Juan,Diaz,Oakland,50006,30011,10001,17-04-13
8,20012,Martha,Diaz,San Francisco,50007,30002,10007,17-10-07
9,20013,,,,50010,30004,10008,17-04-11


In [60]:
pd.merge(customer_df, orders_df, how='outer')


Unnamed: 0,Customer_ID,First_Name,Last_Name,City,Order_ID,Product_ID,Employee_ID,Date
0,20001,Jack,Ward,San Jose,50009.0,30006.0,10009.0,17-12-26
1,20002,Steven,Martinez,San Francisco,50008.0,30001.0,10004.0,17-03-09
2,20003,Jessica,Collins,San Jose,50002.0,30008.0,10011.0,17-09-03
3,20004,Carie,Robinson,San Francisco,50004.0,30008.0,10010.0,17-01-28
4,20005,Zack,Peterson,Oakland,50001.0,30005.0,10001.0,17-02-12
5,20006,Bianca,Sanchez,San Francisco,,,,
6,20007,James,Owen,Oakland,,,,
7,20008,Lisa,Smith,San Francisco,50005.0,30012.0,10002.0,17-08-11
8,20009,Daniel,Yasukawa,Oakland,,,,
9,20010,Lauren,Pham,San Jose,50003.0,30001.0,10009.0,17-11-23


**Append/Concatenate**

In [0]:
supplier_df2 = pd.DataFrame({'Supplier_ID': [40006, 40007], 'Name': ['GameHub Inc', 'TechCenter Media'], 'City': ['Atlanta', 'New York']})


In [62]:
supplier_df.append(supplier_df2)


Unnamed: 0,City,Name,Supplier_ID
0,Oakland,"Music Vibrations, Inc",40001
1,San Francisco,BioMed Inc,40002
2,San Jose,FashionRUs,40003
3,San Francisco,Studio Warehouse,40004
4,San Jose,"CompTech, Inc",40005
0,Atlanta,GameHub Inc,40006
1,New York,TechCenter Media,40007


In [63]:
pd.concat([supplier_df,supplier_df2])


Unnamed: 0,City,Name,Supplier_ID
0,Oakland,"Music Vibrations, Inc",40001
1,San Francisco,BioMed Inc,40002
2,San Jose,FashionRUs,40003
3,San Francisco,Studio Warehouse,40004
4,San Jose,"CompTech, Inc",40005
0,Atlanta,GameHub Inc,40006
1,New York,TechCenter Media,40007


In [64]:
pd.concat([supplier_df,supplier_df2],axis=1)


Unnamed: 0,Supplier_ID,Name,City,City.1,Name.1,Supplier_ID.1
0,40001,"Music Vibrations, Inc",Oakland,Atlanta,GameHub Inc,40006.0
1,40002,BioMed Inc,San Francisco,New York,TechCenter Media,40007.0
2,40003,FashionRUs,San Jose,,,
3,40004,Studio Warehouse,San Francisco,,,
4,40005,"CompTech, Inc",San Jose,,,


**Deleteing/Droping**

In [65]:
supplier_df2.drop([1])


Unnamed: 0,City,Name,Supplier_ID
0,Atlanta,GameHub Inc,40006


In [66]:
supplier_df2.drop(['Supplier_ID'],axis=1)

Unnamed: 0,City,Name
0,Atlanta,GameHub Inc
1,New York,TechCenter Media


**Updating**

In [67]:
supplier_df2.loc[1] = ['Miami', 'TechCenter Media', 40008]
print(supplier_df2)

      City              Name  Supplier_ID
0  Atlanta       GameHub Inc        40006
1    Miami  TechCenter Media        40008


In [68]:
supplier_df2.loc[1,'Supplier_ID'] -= 1
print(supplier_df2)

      City              Name  Supplier_ID
0  Atlanta       GameHub Inc        40006
1    Miami  TechCenter Media        40007


In [69]:
supplier_df2['Supplier_ID'].update(pd.Series([40009,40010]))
print(supplier_df2)

      City              Name  Supplier_ID
0  Atlanta       GameHub Inc        40009
1    Miami  TechCenter Media        40010


**Stacking/Grouping**

In [70]:
supplier_df

Unnamed: 0,Supplier_ID,Name,City
0,40001,"Music Vibrations, Inc",Oakland
1,40002,BioMed Inc,San Francisco
2,40003,FashionRUs,San Jose
3,40004,Studio Warehouse,San Francisco
4,40005,"CompTech, Inc",San Jose


In [71]:
supplier_df.stack()

0  Supplier_ID                    40001
   Name           Music Vibrations, Inc
   City                         Oakland
1  Supplier_ID                    40002
   Name                      BioMed Inc
   City                   San Francisco
2  Supplier_ID                    40003
   Name                      FashionRUs
   City                        San Jose
3  Supplier_ID                    40004
   Name                Studio Warehouse
   City                   San Francisco
4  Supplier_ID                    40005
   Name                   CompTech, Inc
   City                        San Jose
dtype: object

In [72]:
orders_df

Unnamed: 0,Order_ID,Customer_ID,Product_ID,Employee_ID,Date
0,50001,20005,30005,10001,17-02-12
1,50002,20003,30008,10011,17-09-03
2,50003,20010,30001,10009,17-11-23
3,50004,20004,30008,10010,17-01-28
4,50005,20008,30012,10002,17-08-11
5,50006,20011,30011,10001,17-04-13
6,50007,20012,30002,10007,17-10-07
7,50008,20002,30001,10004,17-03-09
8,50009,20001,30006,10009,17-12-26
9,50010,20013,30004,10008,17-04-11


In [73]:
orders_df.stack()

0   Order_ID          50001
    Customer_ID       20005
    Product_ID        30005
    Employee_ID       10001
    Date           17-02-12
1   Order_ID          50002
    Customer_ID       20003
    Product_ID        30008
    Employee_ID       10011
    Date           17-09-03
2   Order_ID          50003
    Customer_ID       20010
    Product_ID        30001
    Employee_ID       10009
    Date           17-11-23
3   Order_ID          50004
    Customer_ID       20004
    Product_ID        30008
    Employee_ID       10010
    Date           17-01-28
4   Order_ID          50005
    Customer_ID       20008
    Product_ID        30012
    Employee_ID       10002
    Date           17-08-11
5   Order_ID          50006
    Customer_ID       20011
    Product_ID        30011
    Employee_ID       10001
    Date           17-04-13
6   Order_ID          50007
    Customer_ID       20012
    Product_ID        30002
    Employee_ID       10007
    Date           17-10-07
7   Order_ID        

In [74]:
orders_df.groupby('Product_ID').count()

Unnamed: 0_level_0,Order_ID,Customer_ID,Employee_ID,Date
Product_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
30001,2,2,2,2
30002,1,1,1,1
30004,1,1,1,1
30005,1,1,1,1
30006,1,1,1,1
30007,1,1,1,1
30008,2,2,2,2
30011,1,1,1,1
30012,1,1,1,1


In [75]:
product_df[['Supplier_Name','Price']].groupby('Supplier_Name').sum()

Unnamed: 0_level_0,Price
Supplier_Name,Unnamed: 1_level_1
BioMed Inc,48.9
"CompTech, Inc",206.98
FashionRUs,45.44
"Music Vibrations, Inc",77.65
Studio Warehouse,69.31


In [76]:
product_df[['Supplier_Name','Product_ID','Price']].groupby(['Supplier_Name','Product_ID']).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Price
Supplier_Name,Product_ID,Unnamed: 2_level_1
BioMed Inc,30002,4.5
BioMed Inc,30007,3.67
BioMed Inc,30008,23.96
BioMed Inc,30010,16.77
"CompTech, Inc",30003,56.99
"CompTech, Inc",30011,149.99
FashionRUs,30004,34.35
FashionRUs,30006,11.09
"Music Vibrations, Inc",30005,46.21
"Music Vibrations, Inc",30009,31.44


## Panda Series Math Operations

**Scalar and vector addition**

In [0]:
df

Unnamed: 0,0,1,2
0,1,2,3
1,2,4,6
2,6,3,4


In [0]:
df+2

Unnamed: 0,0,1,2
0,3,4,5
1,4,6,8
2,8,5,6


In [0]:
df2 = pd.DataFrame([[10,20,30],[40,50,60],[70,80,90]])

In [0]:
df + df2

Unnamed: 0,0,1,2
0,11,22,33
1,42,54,66
2,76,83,94


**Scalar and vector subtraction**

In [0]:
df - 20

Unnamed: 0,0,1,2
0,-19,-18,-17
1,-18,-16,-14
2,-14,-17,-16


In [0]:
df - df2

Unnamed: 0,0,1,2
0,-9,-18,-27
1,-38,-46,-54
2,-64,-77,-86


**Scalar and vector multiplication**

In [0]:
df *10

Unnamed: 0,0,1,2
0,10,20,30
1,20,40,60
2,60,30,40


In [0]:
s = pd.Series([-1,-2,3])

In [0]:
s * df

Unnamed: 0,0,1,2
0,-1,-4,9
1,-2,-8,18
2,-6,-6,12


**Scalar and vector division**

In [0]:
df/100

Unnamed: 0,0,1,2
0,0.01,0.02,0.03
1,0.02,0.04,0.06
2,0.06,0.03,0.04


In [0]:
s/df

Unnamed: 0,0,1,2
0,-1.0,-1.0,1.0
1,-0.5,-0.5,0.5
2,-0.166667,-0.666667,0.75


**Exponentials and roots**

In [0]:
df**3

Unnamed: 0,0,1,2
0,1,8,27
1,8,64,216
2,216,27,64


In [0]:
df**(1/2)

Unnamed: 0,0,1,2
0,1.0,1.414214,1.732051
1,1.414214,2.0,2.44949
2,2.44949,1.732051,2.0


## Panda DataFrame Statistics Operations

**Min/Max/Range**

In [77]:
df_min = df.min()
print(df_min)

0    1
1    2
2    3
dtype: int64


In [78]:
df_max = df.max()
print(df_max)

0    6
1    4
2    6
dtype: int64


In [79]:
df_range = df_max - df_min
print(df_range)

0    5
1    2
2    3
dtype: int64


**Sum and total**

In [80]:
df.sum()

0     9
1     9
2    13
dtype: int64

**Count and frequency**

In [81]:
df.count()

0    3
1    3
2    3
dtype: int64

In [82]:
customer_df.count()

Customer_ID    12
First_Name     12
Last_Name      12
City           12
dtype: int64

**Mean and median**

In [83]:
df.mean()

0    3.000000
1    3.000000
2    4.333333
dtype: float64

In [84]:
df.median()

0    2.0
1    3.0
2    4.0
dtype: float64

**Variance and standard deviation**

In [85]:
df.var()

0    7.000000
1    1.000000
2    2.333333
dtype: float64

In [86]:
df.std()

0    2.645751
1    1.000000
2    1.527525
dtype: float64

**Covariance**

In [87]:
df = pd.DataFrame([[1,2],[2,4],[3,6],[4,8]])
print(df)

   0  1
0  1  2
1  2  4
2  3  6
3  4  8


In [88]:
df.cov()

Unnamed: 0,0,1
0,1.666667,3.333333
1,3.333333,6.666667


**Quantile**

In [89]:
df.quantile([0,.1,.25,.5,.75,1])

Unnamed: 0,0,1
0.0,1.0,2.0
0.1,1.3,2.6
0.25,1.75,3.5
0.5,2.5,5.0
0.75,3.25,6.5
1.0,4.0,8.0


**Cumulative Sum**

In [90]:
df.cumsum()

Unnamed: 0,0,1
0,1,2
1,3,6
2,6,12
3,10,20


**Aggregate functions and describe**

In [91]:
df.agg(['min','mean'])

Unnamed: 0,0,1
min,1.0,2.0
mean,2.5,5.0


In [92]:
df.describe()

Unnamed: 0,0,1
count,4.0,4.0
mean,2.5,5.0
std,1.290994,2.581989
min,1.0,2.0
25%,1.75,3.5
50%,2.5,5.0
75%,3.25,6.5
max,4.0,8.0


In [93]:
product_df.describe()

Unnamed: 0,Product_ID,Price,Supplier_ID
count,12.0,12.0,12.0
mean,30006.5,37.356667,40002.833333
std,3.605551,40.063523,1.403459
min,30001.0,3.67,40001.0
25%,30003.75,12.5075,40002.0
50%,30006.5,27.7,40002.5
75%,30009.25,48.74,40004.0
max,30012.0,149.99,40005.0


**Apply and lambda**

In [94]:
df.apply(['max'])

Unnamed: 0,0,1
max,4,8


In [0]:
def foo(x):
  return x+10

In [96]:
df.apply(foo)

Unnamed: 0,0,1
0,11,12
1,12,14
2,13,16
3,14,18


In [97]:
product_df['Price'].apply(foo)

0      22.98
1      14.50
2      66.99
3      44.35
4      56.21
5      21.09
6      13.67
7      33.96
8      41.44
9      26.77
10    159.99
11     66.33
Name: Price, dtype: float64

In [98]:
prod_min = product_df['Price'].min()
prod_max = product_df['Price'].max()
product_df['Price'].apply(lambda x: (prod_max - x)/(prod_max-prod_min))

0     0.936372
1     0.994328
2     0.635593
3     0.790323
4     0.709267
5     0.949289
6     1.000000
7     0.861331
8     0.810210
9     0.910470
10    0.000000
11    0.640104
Name: Price, dtype: float64

In [99]:
product_supplier_df = pd.merge(product_df, supplier_df, how='inner')
product_supplier_df.head()

Unnamed: 0,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name,Name,City
0,30001,T-Shirt,12.98,40004,Studio Warehouse,Studio Warehouse,San Francisco
1,30012,Coat,56.33,40004,Studio Warehouse,Studio Warehouse,San Francisco
2,30002,Tooth Paste,4.5,40002,BioMed Inc,BioMed Inc,San Francisco
3,30007,Chap Stick,3.67,40002,BioMed Inc,BioMed Inc,San Francisco
4,30008,Medicine,23.96,40002,BioMed Inc,BioMed Inc,San Francisco


In [100]:
combined_df = pd.merge(employee_df, product_supplier_df, how='inner')
combined_df.head()

Unnamed: 0,Employee_ID,First_Name,Last_Name,Department,City,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name,Name
0,10001,Daniel,Olson,Electronics,San Francisco,30001,T-Shirt,12.98,40004,Studio Warehouse,Studio Warehouse
1,10001,Daniel,Olson,Electronics,San Francisco,30012,Coat,56.33,40004,Studio Warehouse,Studio Warehouse
2,10001,Daniel,Olson,Electronics,San Francisco,30002,Tooth Paste,4.5,40002,BioMed Inc,BioMed Inc
3,10001,Daniel,Olson,Electronics,San Francisco,30007,Chap Stick,3.67,40002,BioMed Inc,BioMed Inc
4,10001,Daniel,Olson,Electronics,San Francisco,30008,Medicine,23.96,40002,BioMed Inc,BioMed Inc


In [101]:
combined_df.groupby('Department').count()

Unnamed: 0_level_0,Employee_ID,First_Name,Last_Name,City,Product_ID,Product_Name,Price,Supplier_ID,Supplier_Name,Name
Department,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
Clothes,22,22,22,22,22,22,22,22,22,22
Cosmetics,6,6,6,6,6,6,6,6,6,6
Electronics,14,14,14,14,14,14,14,14,14,14
"Pharmacy, Health & Beauty",6,6,6,6,6,6,6,6,6,6


In [102]:
combined_df[['City','Department','Product_ID']].groupby(['City','Department']).count()

Unnamed: 0_level_0,Unnamed: 1_level_0,Product_ID
City,Department,Unnamed: 2_level_1
Oakland,Clothes,6
Oakland,"Pharmacy, Health & Beauty",2
San Francisco,Clothes,12
San Francisco,Cosmetics,6
San Francisco,Electronics,6
San Jose,Clothes,4
San Jose,Electronics,8
San Jose,"Pharmacy, Health & Beauty",4
