# Numpy 的基本介紹

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 對於各種後續的應用，快速的陣列運算都是不可或缺的。下圖出現的套件都有使用到 numpy，由此可知 numpy 對於科學領域的重要性。
![picture](https://drive.google.com/uc?id=1ENCioG_oQBfa2UdjVghAA9MTILllFw2z)

## 從 github 上的機器學習專案，可以看到 numpy 被引用的比例非常的高，可見 numpy 有多麼的熱門。

![picture](https://drive.google.com/uc?id=1JU1w9sz6sx3nq04_2ceAweVZit3RkkAV)

# 安裝套件

In [None]:
# !pip install numpy
# !pip install pandas



## Numpy 最主要的功能就是做出多維的陣列，並對陣列做快速的數值運算，所謂的多維陣列如下圖所示。

![picture](https://drive.google.com/uc?id=1tgREdFA8EeNvrQIg4azzCqd8MoadCWZB)

# 引入套件

In [5]:
import pandas as pd
import numpy as np

# Numpy 的基本操作

## 創造 numpy.ndarray

In [6]:
# 將 list 轉成 numpy.ndarray，就可以使用 numpy 提供的各種功能
a = np.array([1, 2, 3])
print(type(a))
print(a.shape, end = '\n\n')

b = np.array([[1,2,3]])
print(type(b))
print(b.shape)

c = np.array([[1,2,3],[4,5,6]])
print(type(c))
print(c.shape)

d = np.array([[[1,2,3],[4,5,6]]])
print(type(d))
print(d.shape)

<class 'numpy.ndarray'>
(3,)

<class 'numpy.ndarray'>
(1, 3)
<class 'numpy.ndarray'>
(2, 3)
<class 'numpy.ndarray'>
(1, 2, 3)


In [7]:
# 可以用 numpy 裡的各種 function 建立全部 0、全部 1、隨機等各種 numpy.ndarray

# 全部是 0
a = np.zeros((2,2))
print(f"zeros:\n{a}", end = '\n-------------\n')

# 全部是 1
b = np.ones((1,2))
print(f"ones:\n{b}", end = '\n-------------\n')

# 全部都是你設定的某個數字
c = np.full((2,2), 7)
print(f"full:\n{c}", end = '\n-------------\n')

# 只有斜邊是 1 的陣列(單位矩陣)
d = np.eye(7)
print(f"eye:\n{d}", end = '\n-------------\n')

# 返回 0 到 1 之間的隨機數字
e = np.random.random((2,2))
print(f"random:\n{e}")

zeros:
[[0. 0.]
 [0. 0.]]
-------------
ones:
[[1. 1.]]
-------------
full:
[[7 7]
 [7 7]]
-------------
eye:
[[1. 0. 0. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 1.]]
-------------
random:
[[0.40741861 0.23184123]
 [0.9807583  0.35921005]]


In [8]:
a = np.array([[1,2], [3,4], [5,6]])
print(f"a\n{a.shape}", end = '\n-------------\n')

a
(3, 2)
-------------


## 取值

In [None]:
b = np.array([[[1,2,3]]])
print(b.shape)
c = b.T
print(c.shape)

(1, 1, 3)
(3, 1, 1)


In [None]:
a = np.array([[1,2,3,4], [5,6,7,8], [9,10,11,12]])
print(f"a:\n{a}", end = '\n-------------\n')


# 對 array 進行轉至
print(f"a.T:\n{a.T}", end = '\n-------------\n')

# ndarray 用中括號接數字取值，以二維陣列來說逗點前是取 row，逗點後是取 column
print(f"a[1]:\n{a[1]}", end = '\n-------------\n')
print(f"a[1, 1]:\n{a[1, 1]}", end = '\n-------------\n')

# ndarray 可以用冒號來取一段空間
# 下面這個就是 row 取 0 到 2，column 取 1 到 3
b = a[0:2, 1:3]
print(f"a[0:2, 1:3]:\n{b}", end = '\n-------------\n')

# ndarray 也可以用 list 跳著取
# 下面這個就是 row 取 0 和 2，column 取 1 到 3
c = a[[0, 2], 1:3]
print(f"a[[0, 2], 1:3]:\n{c}", end = '\n-------------\n')

# ndarray 也可以設條件取值
d = a[a > 2]
print(f"a[a > 2]:\n{d}", end = '\n-------------\n')

# ndarray 搜索資料輸出 index
e = np.where(a > 5)
print(f"search a > 5 index from a: \n{e}", end = '\n-------------\n')
print(f"search a > 5:\n{a[e]}", end = '\n-------------\n')

a:
[[ 1  2  3  4]
 [ 5  6  7  8]
 [ 9 10 11 12]]
-------------
a:
[[ 1  5  9]
 [ 2  6 10]
 [ 3  7 11]
 [ 4  8 12]]
-------------
a[1]:
[5 6 7 8]
-------------
a[1, 1]:
6
-------------
a[0:2, 1:3]:
[[2 3]
 [6 7]]
-------------
a[[0, 2], 1:3]:
[[ 2  3]
 [10 11]]
-------------
a[a > 2]:
[ 3  4  5  6  7  8  9 10 11 12]
-------------
search a > 5 index from a: 
(array([1, 1, 1, 2, 2, 2, 2], dtype=int64), array([1, 2, 3, 0, 1, 2, 3], dtype=int64))
-------------
search a > 5:
[ 6  7  8  9 10 11 12]
-------------


## Array 數學運算

In [None]:
# 兩個 array 做一般的加減乘除運算的話，他會將對應位置的元素做運算，位置[0, 0]就對另一個陣列的位置[0, 0]做運算，以此類推
x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])

print(f"x:\n{x}", end = '\n-------------\n')

print(f"y:\n{y}", end = '\n-------------\n')

print(f"x + y:\n{x + y}", end = '\n-------------\n')

print(f"x - y:\n{x - y}", end = '\n-------------\n')

print(f"x * y:\n{x * y}", end = '\n-------------\n')

print(f"x / y:\n{x / y}", end = '\n-------------\n')

print(f"sqrt(x):\n{np.sqrt(x)}")

x:
[[1 2]
 [3 4]]
-------------
y:
[[5 6]
 [7 8]]
-------------
x + y:
[[ 6  8]
 [10 12]]
-------------
x - y:
[[-4 -4]
 [-4 -4]]
-------------
x * y:
[[ 5 12]
 [21 32]]
-------------
x / y:
[[0.2        0.33333333]
 [0.42857143 0.5       ]]
-------------
sqrt(x):
[[1.         1.41421356]
 [1.73205081 2.        ]]


In [None]:
# 也可以對陣列做點積
v = np.array([9,10])
w = np.array([11,12])

x = np.array([[1,2],[3,4]])
y = np.array([[5,6],[7,8]])

# 注意!做點積的時候前面的陣列的最後一個維度要跟後面的陣列的第一個維度一樣
# 例如前面的陣列是 1 * 2，後面的陣列就必須是 2 * N，N 可以是任意數
print(f"v:\n{v}", end = '\n-------------\n')

print(f"w:\n{w}", end = '\n-------------\n')

print(f"x:\n{x}", end = '\n-------------\n')

print(f"y:\n{y}", end = '\n-------------\n')

print(f"np.dot(v, w):\n{np.dot(v, w)}", end = '\n-------------\n')

print(f"np.dot(x, y):\n{np.dot(x, y)}", end = '\n-------------\n')

print(f"np.dot(x, v):\n{np.dot(x, v)}")

v:
[ 9 10]
-------------
w:
[11 12]
-------------
x:
[[1 2]
 [3 4]]
-------------
y:
[[5 6]
 [7 8]]
-------------
np.dot(v, w):
219
-------------
np.dot(x, y):
[[19 22]
 [43 50]]
-------------
np.dot(x, v):
[29 67]


In [None]:
# 兩個 array 做加減乘除運算不見得要 shape 相同，numpy 會幫你做 broadcasting
x = np.array([[1,2,3], [4,5,6], [7,8,9], [10,11,12]])
y = np.array([1,0,1])

print(f"x:\n{x}", end = '\n-------------\n')

print(f"y:\n{y}", end = '\n-------------\n')

print(f"x + y:\n{x + y}")

x:
[[ 1  2  3]
 [ 4  5  6]
 [ 7  8  9]
 [10 11 12]]
-------------
y:
[1 0 1]
-------------
x + y:
[[ 2  2  4]
 [ 5  5  7]
 [ 8  8 10]
 [11 11 13]]


In [None]:
# 用 numpy 完成 數學運算 

# y = exp(x)
x = 10
y = np.exp(x)
print(f"exp({x}): { y }", end="\n-------------\n")

# y = log2(x)
y = np.log2(x)
print(f"log2({x}): { y }", end="\n-------------\n")

# y = log10(x)
y = np.log10(x)
print(f"log10({x}): { y }", end="\n-------------\n")



exp(10): 22026.465794806718
-------------
log2(10): 3.321928094887362
-------------
log10(10): 1.0
-------------


# Pandas 的基本介紹

## Pandas 被設計來專門處理表格型資料，所謂的表格型資料就像平常大家在 EXCEL 裡看到的那樣，是二維的資料。舉例來說，我可以將學員的資料記錄成像右下的圖這樣的表格型資料。在 Pandas 裡，這種表格叫做 DataFrame，一筆資料是一個 Row，一種特徵是一個 Column。

![picture](https://drive.google.com/uc?id=1N8Z3gA-rRwmJT-I31i9Qk9jkxla-R0we)

## 支援各種資料格式的讀寫，從常見的 csv 到資料庫都能做讀寫，非常的方便。

![picture](https://drive.google.com/uc?id=1JHLXXPZi7l7AkjwfQhaiKBxJ-tdKdq2w)

## 內建方便的視覺化功能，幫助使用者快速地做一些資料探索。

![picture](https://drive.google.com/uc?id=1NhHkjOqU2BA6sGN9HV7EJJrFA-0t4BJl)

## 從 Stack Overflow 上 Pandas 出現在提問裡的比例，就能看出 Pandas 有多受歡迎。

![picture](https://drive.google.com/uc?id=18af5ElNnWuyUEKspFBbRVFtQ1GFTt-BQ)

# Pandas 的基本操作

## 自製 DataFrame

In [None]:
# 給 pd.DataFrame 一個字典，字典裡的 key 給字串代表 feature 名稱，value 給 list 代表這個 feature 所有的值
df = pd.DataFrame({
    "Name": ["Jerry", "Mary", "Tom"], 
    "Age": [25, 29, 30], 
    "Sex": ["male", "female", "male"]}
    )
print(f"資料型態: {type(df)}")
df

資料型態: <class 'pandas.core.frame.DataFrame'>


Unnamed: 0,Name,Age,Sex
0,Jerry,25,male
1,Mary,29,female
2,Tom,30,male


## 取出特定 column

In [None]:
# 只需要用 DataFrame 加中括號，中括號裡給 feature 名稱，就能拿到這個 feature 裡所有的值
# 這裡可以看到，單獨取出一個 column，他的型態會是 Series，由此也可以知道 DataFrame 是由多個 Series 組合而成的
print(type(df["Name"]))
df["Name"]

<class 'pandas.core.series.Series'>


0    Jerry
1     Mary
2      Tom
Name: Name, dtype: object

## 自製 Series 並加進 DataFrame

In [None]:
# 給 pd.Series 一個 list 和 feature 名稱就行了，其實 Series 和 list 非常像，要把 list 轉 Series 主要是為了使用 Pandas 裡的功能
height = pd.Series([172, 180], name="height")
print(type(height))
height

<class 'pandas.core.series.Series'>


0    172
1    180
Name: height, dtype: int64

In [None]:
# 如果要新增一個 feature 到 DataFrame，只需要用 DataFrame["feature_name"] = some_series 這種寫法就行了
df["height"] = height
df

Unnamed: 0,Name,Age,Sex,height
0,Jerry,25,male,172.0
1,Mary,29,female,180.0
2,Tom,30,male,


## 計算各 column 的統計數值

In [None]:
df.describe()

Unnamed: 0,Age,height
count,3.0,3.0
mean,28.0,177.666667
std,2.645751,4.932883
min,25.0,172.0
25%,27.0,176.0
50%,29.0,180.0
75%,29.5,180.5
max,30.0,181.0


在train裡面，我們有的變數是:
- PassengerId:乘客編號
- Survived:是否存活(0:死亡，1:存活)
- Pclass:票艙分級(1:最高等級,...,3:最低等級)
- Name:姓名
- Sex:性別
- Age:年齡
- SibSp:兄弟姊妹及配偶在船上的總數(旁系)
- Parch:父母及小孩在船上的總數(直系)
- Ticket:票號
- Fare:票價
- Cabin:客艙編號
- Embarked:上船的港口


## 讀取 csv 檔

In [None]:
# 注意一下這邊的寫法是用相對路徑，路徑會從程式碼在的資料夾開始往下算
titanic = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/BenQ_AI_notebook/teaching material/Data processing and visualization/dataset/titanic_data/train.csv")
titanic

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


## 取前幾個 row 或後幾個 row 出來看

In [None]:
# head 裡可以給一個數字指定要看開頭多少個 row
titanic.head(7)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S


In [None]:
# tail 裡可以給一個數字指定要看結尾多少個 row
titanic.tail(7)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
884,885,0,3,"Sutehall, Mr. Henry Jr",male,25.0,0,0,SOTON/OQ 392076,7.05,,S
885,886,0,3,"Rice, Mrs. William (Margaret Norton)",female,39.0,0,5,382652,29.125,,Q
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.45,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0,C148,C
890,891,0,3,"Dooley, Mr. Patrick",male,32.0,0,0,370376,7.75,,Q


## 確認各 column 的資料型態

In [None]:
# 顯示各 feature 的資料型態，object 通常代表字串
titanic.dtypes

PassengerId      int64
Survived         int64
Pclass           int64
Name            object
Sex             object
Age            float64
SibSp            int64
Parch            int64
Ticket          object
Fare           float64
Cabin           object
Embarked        object
dtype: object

## 查看各 column 缺失值狀況及資料型態

In [None]:
# 顯示各欄位的缺失值狀況和資料型態
titanic.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          714 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        204 non-null    object 
 11  Embarked     889 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


## 一次取多個 column

In [None]:
# 只要用 DataFrame 中括號裡給一個 list，就可以一次取多個 column
sex_survived = titanic[["Sex", "Survived"]]
sex_survived.head(10)

Unnamed: 0,Sex,Survived
0,male,0
1,female,1
2,female,1
3,female,1
4,male,0
5,male,0
6,male,0
7,male,0
8,female,1
9,female,1


## 根據條件篩選想看的 row

In [None]:
# 也可以用這種條件判斷式的寫法，來篩選特定資料
female_survived = sex_survived[sex_survived["Sex"] == "female"]
female_survived.head(10)

Unnamed: 0,Sex,Survived
1,female,1
2,female,1
3,female,1
8,female,1
9,female,1
10,female,1
11,female,1
14,female,0
15,female,1
18,female,0


In [None]:
# 以下兩種寫法等價
# 第二種寫法要注意判斷式兩邊要加括號、然後 or 是用 | 符號，and 是用 & 符號

# 方法 1
# pclass_1_3 = titanic[titanic["Pclass"].isin([1, 3])]

# 方法 2
pclass_1_3 = titanic[(titanic["Pclass"] == 1) | (titanic["Pclass"] == 3)]
pclass_1_3.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S


## 取出及設定 [row, column] 位置的值

In [None]:
# loc 的寫法可以接受條件判斷式、字串等
pclass_age = titanic.loc[titanic["Pclass"] == 1, "Age"]
pclass_age.head(10)

1     38.0
3     35.0
6     54.0
11    58.0
23    28.0
27    19.0
30    40.0
31     NaN
34    28.0
35    42.0
Name: Age, dtype: float64

In [None]:
# iloc 只能接受數字
some_data = titanic.iloc[5:15, 1:3]
some_data

Unnamed: 0,Survived,Pclass
5,0,3
6,0,1
7,0,3
8,1,3
9,1,2
10,1,3
11,1,1
12,0,3
13,0,3
14,0,3


In [None]:
# 要設值的時候可以用 loc 或 iloc 指定位置
titanic.iloc[0:5, 3] = "Jerry"
titanic.head(10)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,Jerry,male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,Jerry,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,Jerry,female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,Jerry,female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,Jerry,male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S
7,8,0,3,"Palsson, Master. Gosta Leonard",male,2.0,3,1,349909,21.075,,S
8,9,1,3,"Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)",female,27.0,0,2,347742,11.1333,,S
9,10,1,2,"Nasser, Mrs. Nicholas (Adele Achem)",female,14.0,1,0,237736,30.0708,,C


## 最後將 DataFrame 存成 excel 檔，再嘗試讀取 excel 檔，確定存的檔案沒有問題

In [None]:
# excel 檔有儲存多張表格的能力，每張表都有自己的 sheet_name，所以讀寫的時候需要特別設定
titanic.to_excel("titanic_data/train.xlsx", sheet_name = "sheet_1", index = False)

In [None]:
titanic = pd.read_excel("titanic_data/train.xlsx", sheet_name="sheet_1")
titanic.head(7)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,Jerry,male,22.0,1,0,A/5 21171,7.25,,S
1,2,1,1,Jerry,female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,Jerry,female,26.0,0,0,STON/O2. 3101282,7.925,,S
3,4,1,1,Jerry,female,35.0,1,0,113803,53.1,C123,S
4,5,0,3,Jerry,male,35.0,0,0,373450,8.05,,S
5,6,0,3,"Moran, Mr. James",male,,0,0,330877,8.4583,,Q
6,7,0,1,"McCarthy, Mr. Timothy J",male,54.0,0,0,17463,51.8625,E46,S


# Cheat Sheet

## 很多比較熱門的套件都會有熱心人士幫忙製作這種 Cheat Sheet，上面會記錄一些常用功能的簡介，臨時忘記什麼功能的時候可以參考一下。

![picture](https://drive.google.com/uc?id=18Hu82RWdQ_zpfJjdxvEcZ0o0TooHg0A4)

![picture](https://drive.google.com/uc?id=1c_WiNQRu_KdDKy59pQQBysIyAJLAk64C)

# 參考連結

## https://venturebeat.com/2019/01/24/github-numpy-and-scipy-are-the-most-popular-packages-for-machine-learning-projects/

## https://numpy.org/

## https://s3.amazonaws.com/assets.datacamp.com/blog_assets/Numpy_Python_Cheat_Sheet.pdf

## https://pandas.pydata.org/docs/getting_started/intro_tutorials/

## https://www.sqlshack.com/getting-started-with-pandas-in-python/

## http://datacamp-community-prod.s3.amazonaws.com/dbed353d-2757-4617-8206-8767ab379ab3