## Numpy Intro

In [18]:
# Import the numpy package as np
import numpy as np
import pandas as pd

# Create list baseball
baseball = [180, 215, 210, 210, 188, 176, 209, 200]
# Create a numpy array from baseball: np_baseball
np_baseball = np.array(baseball)
# Print out type of np_baseball
print(type(np_baseball))


height = [5.4,6.11,5.2,7.4]
# Create a numpy array from height: np_height
np_height = np.array(height)
# Convert np_height to m: np_height_m
np_height_m = np_height * 0.0254
# Print np_height_m
print(np_height_m)


# Create array from weight with correct units: np_weight_kg
weight = [60,51,55,62]
np_weight_kg = np.array(weight) * 0.453592
# Calculate the BMI: bmi
bmi = np_weight_kg/(np_height_m ** 2)
# Print out bmi
print(bmi)

<class 'numpy.ndarray'>
[0.13716  0.155194 0.13208  0.18796 ]
[1446.64404555  960.47421155 1430.05899917  796.02407561]


## Numpy subsetting

In [5]:
# Create the light array of booleans representing if below bmi of 21
light = bmi < 1000
print(light)
# Print BMI's that meet the light metric
print(bmi[light])

# Print out the weight at index 50
print(np_weight_kg[2])
# Print out sub-array of np_height: index 100 up to and including index 110
print(np_height[1:3])

[False  True False  True]
[960.47421155 796.02407561]
24.94756
[6.11 5.2 ]


## Numpy N-Dimensional Array 

In [6]:
# Create baseball, a list of lists
baseball = [[180, 78.4],
            [215, 102.7],
            [210, 98.5],
            [188, 75.2]]
# Create a 2D numpy array from baseball: np_baseball
np_baseball = np.array(baseball)
# Print out the type of np_baseball
print(type(np_baseball))
# Print out the shape of np_baseball
print(np_baseball.shape)

<class 'numpy.ndarray'>
(4, 2)


### Subsetting N-dimensional numpy array
The indexes before the comma refer to the rows, while those after the comma refer to the columns. The : is for slicing; in this example, it tells Python to include all rows.

In [15]:
# Create np_baseball (2 cols)
np_baseball = np.array(baseball)

# Print out the 2nd row of np_baseball
print(np_baseball[1,:])

# Select the entire second column of np_baseball: np_weight
np_weight = np_baseball[:,1]
print(np_weight)
print(type(np_weight))

# Create numpy array: conversion
conversion = np.array([0.1, 0.4, 1])
np_3d = np.array([[3,1,2],[2,1,5],[3,2,1]])
np_3d1 = conversion * np_3d
np_add = np_3d + conversion
print(np_3d1)
print(np_add)

[215.  102.7]
[ 78.4 102.7  98.5  75.2]
<class 'numpy.ndarray'>
[[0.3 0.4 2. ]
 [0.2 0.4 5. ]
 [0.3 0.8 1. ]]
[[3.1 1.4 3. ]
 [2.1 1.4 6. ]
 [3.1 2.4 2. ]]


## Numpy Statistics

In [35]:
# Note np.random.normal(1.75, 0.20, 5000) generates a random sample of 5000 with a normal distribution mean and std-deviation of 1.75 and 0.2
height = np.round(np.random.normal(1.75, 0.20, 5000),2)
weight = np.round(np.random.normal(60.32, 0.15, 5000),2)
np_baseball = np.column_stack((height, weight))

# Create np_height from np_baseball extract column
np_height = np_baseball[:,0]
# Print out the mean of np_height
print(np.mean(np_height))
# Print out the median of np_height
print(np.median(np_height))

# Print mean height (first column)
avg = np.mean(np_baseball[:,0])
print("Average: " + str(avg))
# Print median height.
med = np.median(np_baseball[:,0])
print("Median: " + str(med))
# Print out the standard deviation on height.
stddev = np.std(np_baseball[:,0])
print("Standard Deviation: " + str(stddev))
# Print out correlation between first and second column. almost 0 correlation because data is random from N-distribution
corr = np.corrcoef(x = np_baseball[:,0], y = np_baseball[:,1])
print("Correlation: " + str(corr))


# Looking up 
np_heights = np.array([1.8,1.6,1.7,1.9,2,2.1,1.6])
np_positions = np.array(['Gk','D','GK','F','D','F','Gk'])
# Heights of the goalkeepers: gk_heights
gk_heights = np_heights[np_positions == 'GK']
# Heights of the other players: other_heights
other_heights = np_heights[np_positions != "GK"]
print(np_positions != 'GK')
# Print out the median height of goalkeepers. 
print("Median height of goalkeepers: " + str(np.median(gk_heights)))
# Print out the median height of other players.
print("Median height of other players: " + str(np.median(other_heights)))



# How to display a numpy matrix nicely is to convert to a panda as followed
col_names = ["height", "weight"]
dg = pd.DataFrame(np_baseball, columns=col_names)
display(dg)

1.7489199999999998
1.75
Average: 1.7489199999999998
Median: 1.75
Standard Deviation: 0.19796937540943044
Correlation: [[1.00000000e+00 8.87721572e-04]
 [8.87721572e-04 1.00000000e+00]]
[ True  True False  True  True  True  True]
Median height of goalkeepers: 1.7
Median height of other players: 1.85


Unnamed: 0,height,weight
0,1.78,60.41
1,1.84,60.67
2,1.82,60.36
3,1.53,60.05
4,2.00,60.44
5,1.40,60.49
6,1.81,60.28
7,1.99,60.41
8,1.88,60.44
9,1.99,60.34
