In [7]:
# Intro to Numpy
# Numpy is a Python package that works with arrays
# What Pandas is to tables, Numpy is to arrays and numerical operations
import numpy as np
# Arrays resemble lists but require all elements to be the same type
# However, arrays consume less memory than lists
# Also more useful for manipulation in Numpy
x = np.array([4, 5, 7, 9, 12])
# compare list: x = [4, 5, 7, 9, 12]
print(x)
# Type is assumed by numpy but you can also specify it
y = np.array([4, 5, 7, 9, 12], dtype = 'float')
print(y)

[ 4  5  7  9 12]
[ 4.  5.  7.  9. 12.]


In [8]:
# Multi Dimensional Arrays
x = np.array([[1, 1, 2, 5, 9], [50, 55, 75, 120, 200]])
# Even larger arrays can be created, but such complex datasets are better handled with Pandas
# The shape of an array can be handled with the shape method
x.shape
# Copying arrays
y = x.copy()
# Copied array can now be changed without affecting original

(2, 5)

In [10]:
# Array Indexing
# Works similarly to indexing in other aspects of Python
x = np.array([4, 5, 7, 9, 12])
print(x[3])
print(x[-1])
x = np.array([[1, 1, 2, 5, 9], [50, 55, 75, 120, 200]])
# To index a multi dimensional array, simply pass two values in separate brackets
print(x[1][2])

9
12
75


In [11]:
# Array Slicing
# Again, this is similar to slicing elsewhere in Python
x = np.array([4, 5, 7, 9, 12])
# Get elements two through four
print(x[1:4])
# Get the first three elements
print(x[:3])
# Get the last three elements
print(x[2:])
# Get the last three elements - different way
print(x[-3:])

[5 7 9]
[4 5 7]
[ 7  9 12]
[ 7  9 12]


In [18]:
# Key Numpy Functions
# Randomizing
r = np.random.random([5])
# To get between 0 and 100
# r = np.random.random([5]) * 100
print(r)
# Arithmetic Operations
print(np.sqrt(100))
print(np.log10([8, 2, 4]))
print(np.round(8.514324326, 4))
print(np.floor(8.51))
print(np.ceil(8.49))
# Trig functions are also available; we won't go into those today

[0.96900127 0.11568586 0.11807955 0.33008125 0.92683009]
10.0
[0.90308999 0.30103    0.60205999]
8.5143
8.0
9.0


In [36]:
# Numpy Operations in Pandas
# Can be used element wise just like other Pandas operations
# Bring in our cars dataset
import pandas as pd
cars = pd.read_csv("cars.csv")
cars
# Add a column for the square root of displacement and round it off
cars["sqrt_disp"] = np.sqrt(cars["disp"])
cars["sqrt_disp"] = np.round(cars["sqrt_disp"], 1)
cars
# Call summary statistics on cars
# print(np.mean(cars["mpg"]))
# print(np.median(cars["mpg"]))
print(np.round(np.percentile(cars["mpg"], 95), 0))

31.0
