# # Getting Started with Numpy
### <p style="color:Tomato">Learn the basics of Numpy while working with alcohol consumption data<p/>

#### <p style="color:Gray">world_alcohol.csv<p/>
: Which records per capita alcohol consumption for each country.<br/>
각 국가별로 1인당 알코올 섭취량을 기록
* a comma separated value dataset
* specify the delimiter using the delimiter parameter

Which records per capita alcohol consumption for each country.

> learning of the basics of the NumPy library and how to work with NumPy arrays. 

In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = 'all'

In [2]:
import numpy as np

In [3]:
vector = np.array([5, 10, 15, 20])
vector
matrix = np.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
matrix

array([ 5, 10, 15, 20])

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [4]:
vector = np.array([10, 20, 30])
vector
matrix = np.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])

array([10, 20, 30])

#### <p style="color:Gray">ndarray.shape<p/><hr>
To figure out how many elements are in the array.

In [5]:
vector = np.array([1, 2, 3, 4])
print(vector.shape)

(4,)


> This tuple indicates that the array vector has one dimension, with length 4, which matches our intuition that vector has 4 elements.

In [6]:
matrix = np.array([[5, 10, 15], [20, 25, 30]])
print(matrix.shape)

(2, 3)


> The above code will result in the tuple (2,3) indicating that matrix has 2 rows and 3 columns.

In [7]:
vector_shape = vector.shape
matrix_shape = matrix.shape
print(vector_shape)
print(matrix_shape)

(4,)
(2, 3)


#### <p style="color:Gray">numpy.genfromtxt()<p/><hr>
Read in datasets.<br/>


In [8]:
nfl = np.genfromtxt("nfl.csv", delimiter=",")
print(nfl[:10])

[[             nan              nan              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]
 [  2.00900000e+03   1.00000000e+00              nan              nan]]


In [9]:
world_alcohol = np.genfromtxt('world_alcohol.csv', delimiter=",")
print(type(world_alcohol))
print(world_alcohol[:10])

<class 'numpy.ndarray'>
[[             nan              nan              nan              nan
               nan]
 [  1.98600000e+03              nan              nan              nan
    0.00000000e+00]
 [  1.98600000e+03              nan              nan              nan
    5.00000000e-01]
 [  1.98500000e+03              nan              nan              nan
    1.62000000e+00]
 [  1.98600000e+03              nan              nan              nan
    4.27000000e+00]
 [  1.98700000e+03              nan              nan              nan
    1.98000000e+00]
 [  1.98700000e+03              nan              nan              nan
    0.00000000e+00]
 [  1.98700000e+03              nan              nan              nan
    1.30000000e-01]
 [  1.98500000e+03              nan              nan              nan
    3.90000000e-01]
 [  1.98600000e+03              nan              nan              nan
    1.55000000e+00]]


#### <p style="color:Gray">dtype<p/><hr>
Numpy will automatically figure out an appropriate data type when reading in data of converting lists to arrays. Check the data type of a Numpy array<br/>

NumPy는 데이터를 읽거나 목록을 배열로 변환 할 때 자동으로 적절한 데이터 유형을 찾습니다. dtype 속성을 사용하여 NumPy 배열의 데이터 유형을 확인할 수 있습니다.

In [10]:
numbers = np.array([1, 2, 3, 4])
numbers.dtype

dtype('int32')

In [11]:
world_alcohol_dtype = world_alcohol.dtype
world_alcohol_dtype

dtype('float64')

### <p style="color:Tomato">How to deal with missing data<p/>
#### <p style="color:Gray">Nan<p/><hr>
Which stands for "Not a number", is a data type used to represent missing values.<br/>
NumPy can't convert a value to a numeric data type like float or integer, it uses a special nan value that stands for "not a number".<br/>
When NumPy can't convert a value to a numeric data type like float or integer, it uses a special nan value that stands for "not a number".
* header
#### <p style="color:Gray">skip_header<p/><hr>
> To skep the header when reading in the data, we use the skip_header parameter. The skip_header parameter accepts an integer value, specifying the number of lines from the top of the file we want Numpy to ignore. 

<br/>


* To specify the data type for the entire Numpy array, we use the keyword argument dtype and set it to "U75".
* This specifies that we want to read in each value as a 75 byte unicode data type.


In [12]:
world_alcohol = np.genfromtxt(
    'world_alcohol.csv', 
    dtype='U75', 
    delimiter=',',
    skip_header=1)

In [13]:
print(world_alcohol)

[['1986' 'Western Pacific' 'Viet Nam' 'Wine' '0']
 ['1986' 'Americas' 'Uruguay' 'Other' '0.5']
 ['1985' 'Africa' "Cte d'Ivoire" 'Wine' '1.62']
 ..., 
 ['1986' 'Europe' 'Switzerland' 'Spirits' '2.54']
 ['1987' 'Western Pacific' 'Papua New Guinea' 'Other' '0']
 ['1986' 'Africa' 'Swaziland' 'Other' '5.15']]


#### <p style="color:Gray">Indexing Array<p/><hr>

In [14]:
vector = np.array([5, 10, 15, 20])
print(vector[0])

5


In [15]:
list_of_lists = [[5, 10, 15], [20, 25, 30]]

In [18]:
first_item = list_of_lists[0]
first_item
first_item[2]

list_of_lists[0][2]

[5, 10, 15]

15

15

In [19]:
matrix = np.array([[5, 10, 15], [20, 25, 30]])
matrix[1, 2]

30

In [20]:
world_alcohol

array([['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
       ..., 
       ['1986', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['1986', 'Africa', 'Swaziland', 'Other', '5.15']], 
      dtype='<U75')

In [22]:
world_alcohol = np.genfromtxt(
    'world_alcohol.csv', 
    dtype='U75', 
    delimiter=',')
world_alcohol

array([['Year', 'WHO region', 'Country', 'Beverage Types', 'Display Value'],
       ['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ..., 
       ['1986', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['1986', 'Africa', 'Swaziland', 'Other', '5.15']], 
      dtype='<U75')

In [23]:
world_alcohol = np.genfromtxt(
    'world_alcohol.csv', 
    dtype='U75', 
    delimiter=',',
    skip_header=1)
world_alcohol

array([['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
       ..., 
       ['1986', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['1986', 'Africa', 'Swaziland', 'Other', '5.15']], 
      dtype='<U75')

In [29]:
# uruguay_other_1986 = world_alcohol[1][4]
uruguay_other_1986 = world_alcohol[1, 4]
uruguay_other_1986

'0.5'

In [30]:
# third_country = world_alcohol[2][2]
third_country = world_alcohol[2, 2]
third_country

"Cte d'Ivoire"

#### <p style="color:Gray">Slicing Array<p/><hr>
> The colon by itself : specifies that the entirety of a single dimension should be selected.

In [32]:
vector = np.array([5, 10, 15, 20])
vector[0:3]

array([ 5, 10, 15])

In [33]:
matrix = np.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])

In [34]:
matrix[:, 1]

array([10, 25, 40])

In [36]:
countries = world_alcohol[:, 2]
countries

array(['Viet Nam', 'Uruguay', "Cte d'Ivoire", ..., 'Switzerland',
       'Papua New Guinea', 'Swaziland'], 
      dtype='<U75')

In [38]:
alcohol_consumption = world_alcohol[:, 4]
alcohol_consumption

array(['0', '0.5', '1.62', ..., '2.54', '0', '5.15'], 
      dtype='<U75')

#### <p style="color:Gray">Slicing One dimention<p/><hr>
To select one whole dimension, and a slice of the other, we need to use special notation

In [53]:
matrix = np.array([
                    [5, 10, 15], 
                    [20, 25, 30],
                    [35, 40, 45]
                 ])

In [42]:
matrix[:, 0:2]

array([[ 5, 10],
       [20, 25],
       [35, 40]])

In [43]:
matrix[1:3, :]

array([[20, 25, 30],
       [35, 40, 45]])

In [44]:
matrix[1:3, 1]

array([25, 40])

In [48]:
first_two_columns = world_alcohol[:, 0:2]
first_two_columns

array([['1986', 'Western Pacific'],
       ['1986', 'Americas'],
       ['1985', 'Africa'],
       ..., 
       ['1986', 'Europe'],
       ['1987', 'Western Pacific'],
       ['1986', 'Africa']], 
      dtype='<U75')

In [49]:
first_ten_years = world_alcohol[0:10, 0]
first_ten_years

array(['1986', '1986', '1985', '1986', '1987', '1987', '1987', '1985',
       '1986', '1984'], 
      dtype='<U75')

In [50]:
first_ten_rows = world_alcohol[0:10, :]
first_ten_rows

array([['1986', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['1986', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
       ['1986', 'Americas', 'Colombia', 'Beer', '4.27'],
       ['1987', 'Americas', 'Saint Kitts and Nevis', 'Beer', '1.98'],
       ['1987', 'Americas', 'Guatemala', 'Other', '0'],
       ['1987', 'Africa', 'Mauritius', 'Wine', '0.13'],
       ['1985', 'Africa', 'Angola', 'Spirits', '0.39'],
       ['1986', 'Americas', 'Antigua and Barbuda', 'Spirits', '1.55'],
       ['1984', 'Africa', 'Nigeria', 'Other', '6.1']], 
      dtype='<U75')

#### <p style="color:Gray">Slicing arrays<p/><hr>
slice along both dimensions simultaneously.

In [51]:
matrix[1:3, 0:2]

array([[20, 25],
       [35, 40]])

In [52]:
first_twenty_regions = world_alcohol[0:20, 1:3]
first_twenty_regions

array([['Western Pacific', 'Viet Nam'],
       ['Americas', 'Uruguay'],
       ['Africa', "Cte d'Ivoire"],
       ['Americas', 'Colombia'],
       ['Americas', 'Saint Kitts and Nevis'],
       ['Americas', 'Guatemala'],
       ['Africa', 'Mauritius'],
       ['Africa', 'Angola'],
       ['Americas', 'Antigua and Barbuda'],
       ['Africa', 'Nigeria'],
       ['Africa', 'Botswana'],
       ['Americas', 'Guatemala'],
       ['Western Pacific', "Lao People's Democratic Republic"],
       ['Eastern Mediterranean', 'Afghanistan'],
       ['Western Pacific', 'Viet Nam'],
       ['Africa', 'Guinea-Bissau'],
       ['Americas', 'Costa Rica'],
       ['Africa', 'Seychelles'],
       ['Europe', 'Norway'],
       ['Africa', 'Kenya']], 
      dtype='<U75')

#### <p style="color:Gray">world_alcohol.csv<p/>
Each row specifies how many liters of a type of alcohol the average citizen of a particular country drank in a given year.<br/>
For example, One row shows **how many liters of wine the typical Vietnamese citizen drank in 1986.**<br/>
각 행은 특정 국가의 시민들이 주어진 해에 특정종류의 알콜을 얼마나 마셨는지 나타냅니다. 예를 들어, 베트남 시민이 1986년에 마신 와인의 양은 얼마인가를 보여줍니다. 

> * Year -- The year the data in the row is for
* WHO Region -- The region in which the country is located
* Country -- The country of the data is for
* Beverage Types -- The type of beverage
* Display Value -- The average number of liters drunk per capita

### <p style="color:Tomato">Computation with NumPy<p/>
#### <p style="color:Gray">Array Comparisons<p/><hr>
The NumPy modules is the ability to make comparisons across an entire array. These comparisons result in **Boolean Values**


In [54]:
vector = np.array([5, 10, 15, 20])
vector == 10

array([False,  True, False, False], dtype=bool)

In [55]:
matrix == 25

array([[False, False, False],
       [False,  True, False],
       [False, False, False]], dtype=bool)

In [59]:
countries_canada = world_alcohol[:, 2] == 'Canada'
countries_canada

array([False, False, False, ..., False, False, False], dtype=bool)

In [61]:
years_1984 = world_alcohol[:, 0] == '1984'
years_1984 

array([False, False, False, ..., False, False, False], dtype=bool)

#### <p style="color:Gray">Selecting Elements<p/><hr>
Select certain elements in vectors, or certain rows in matrics.

In [64]:
vector

array([ 5, 10, 15, 20])

In [63]:
equal_to_ten = (vector == 10)
print(vector[equal_to_ten])

[10]


In [66]:
matrix

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [65]:
second_column_25 = (matrix[:, 1] == 25)
print(matrix[second_column_25, :])

[[20 25 30]]


In [69]:
country_is_algeria = world_alcohol[:, 2] == 'Algeria'
contry_algeria = world_alcohol[country_is_algeria, :]
country_is_algeria
contry_algeria

array([False, False, False, ..., False, False, False], dtype=bool)

array([['1984', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1987', 'Africa', 'Algeria', 'Beer', '0.17'],
       ['1987', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1986', 'Africa', 'Algeria', 'Wine', '0.1'],
       ['1984', 'Africa', 'Algeria', 'Other', '0'],
       ['1989', 'Africa', 'Algeria', 'Beer', '0.16'],
       ['1989', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1989', 'Africa', 'Algeria', 'Wine', '0.23'],
       ['1986', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1984', 'Africa', 'Algeria', 'Wine', '0.12'],
       ['1985', 'Africa', 'Algeria', 'Beer', '0.19'],
       ['1985', 'Africa', 'Algeria', 'Other', '0'],
       ['1986', 'Africa', 'Algeria', 'Beer', '0.18'],
       ['1985', 'Africa', 'Algeria', 'Wine', '0.11'],
       ['1986', 'Africa', 'Algeria', 'Other', '0'],
       ['1989', 'Africa', 'Algeria', 'Other', '0'],
       ['1987', 'Africa', 'Algeria', 'Other', '0'],
       ['1984', 'Africa', 'Algeria', 'Beer', '0.2'],
       ['1985', 'Africa', 'A

#### <p style="color:Gray">Comparisons with Multiple Conditions<p/><hr>
We can also perform comparisons with multiple conditions by specifying each one separately, then joining the with an ampersand(&).

In [72]:
equal_to_ten_and_five = (vector == 10) & (vector == 5)
equal_to_ten_and_five
#  none of the elements can be 10 and 5 at the same time

array([False, False, False, False], dtype=bool)

In [73]:
equal_to_ten_or_five = (vector == 10) | (vector == 5)
equal_to_ten_or_five

array([ True,  True, False, False], dtype=bool)

In [77]:
is_algeria_and_1986 =( 
                    world_alcohol[:, 0] == '1986') & (
                    world_alcohol[:, 2] == 'Algeria')

In [81]:
rows_with_algeria_and_1986 = world_alcohol[is_algeria_and_1986, :]
rows_with_algeria_and_1986

array([['1986', 'Africa', 'Algeria', 'Wine', '0.1'],
       ['1986', 'Africa', 'Algeria', 'Spirits', '0.01'],
       ['1986', 'Africa', 'Algeria', 'Beer', '0.18'],
       ['1986', 'Africa', 'Algeria', 'Other', '0']], 
      dtype='<U75')

#### <p style="color:Gray">Replacing Values<p/><hr>
Use comparisons to replace values in an array based on certain conditions.

In [85]:
vector = np.array([5, 10, 15, 20])
equal_to_ten_or_five = (vector == 10) | (vector == 5)
equal_to_ten_or_five
vector[equal_to_ten_or_five]
vector[equal_to_ten_or_five] = 50
print(vector)

array([ True,  True, False, False], dtype=bool)

array([ 5, 10])

[50 50 15 20]


In [86]:
matrix = np.array([
         [5, 10, 15], 
         [20, 25, 30],
         [35, 40, 45]
        ])

In [87]:
second_column_25 = matrix[:, 1] == 25
matrix[second_column_25, 1] = 10
matrix

array([[ 5, 10, 15],
       [20, 10, 30],
       [35, 40, 45]])

Replace all instance of the string 1986 in the first column of world_alcohol with the string 2014

In [90]:
world_alcohol[:, 0][world_alcohol[:, 0] == '1986'] = '2014'

In [91]:
world_alcohol

array([['2014', 'Western Pacific', 'Viet Nam', 'Wine', '0'],
       ['2014', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Wine', '1.62'],
       ..., 
       ['2014', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['2014', 'Africa', 'Swaziland', 'Other', '5.15']], 
      dtype='<U75')

In [93]:
world_alcohol[:, 3][world_alcohol[:, 3] == 'Wine'] = 'Grog'
world_alcohol

array([['2014', 'Western Pacific', 'Viet Nam', 'Grog', '0'],
       ['2014', 'Americas', 'Uruguay', 'Other', '0.5'],
       ['1985', 'Africa', "Cte d'Ivoire", 'Grog', '1.62'],
       ..., 
       ['2014', 'Europe', 'Switzerland', 'Spirits', '2.54'],
       ['1987', 'Western Pacific', 'Papua New Guinea', 'Other', '0'],
       ['2014', 'Africa', 'Swaziland', 'Other', '5.15']], 
      dtype='<U75')