## Intoduction to numpy, pandas, and matplotlib

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
np, pd, plt

(<module 'numpy' from '/Users/mattkrepp/opt/anaconda3/lib/python3.9/site-packages/numpy/__init__.py'>,
 <module 'pandas' from '/Users/mattkrepp/opt/anaconda3/lib/python3.9/site-packages/pandas/__init__.py'>,
 <module 'matplotlib.pyplot' from '/Users/mattkrepp/opt/anaconda3/lib/python3.9/site-packages/matplotlib/pyplot.py'>)

In [3]:
dir(np)

['ALLOW_THREADS',
 'AxisError',
 'BUFSIZE',
 'Bytes0',
 'CLIP',
 'DataSource',
 'Datetime64',
 'ERR_CALL',
 'ERR_DEFAULT',
 'ERR_IGNORE',
 'ERR_LOG',
 'ERR_PRINT',
 'ERR_RAISE',
 'ERR_WARN',
 'FLOATING_POINT_SUPPORT',
 'FPE_DIVIDEBYZERO',
 'FPE_INVALID',
 'FPE_OVERFLOW',
 'FPE_UNDERFLOW',
 'False_',
 'Inf',
 'Infinity',
 'MAXDIMS',
 'MAY_SHARE_BOUNDS',
 'MAY_SHARE_EXACT',
 'MachAr',
 'NAN',
 'NINF',
 'NZERO',
 'NaN',
 'PINF',
 'PZERO',
 'RAISE',
 'SHIFT_DIVIDEBYZERO',
 'SHIFT_INVALID',
 'SHIFT_OVERFLOW',
 'SHIFT_UNDERFLOW',
 'ScalarType',
 'Str0',
 'Tester',
 'TooHardError',
 'True_',
 'UFUNC_BUFSIZE_DEFAULT',
 'UFUNC_PYVALS_NAME',
 'Uint64',
 'WRAP',
 '_NoValue',
 '_UFUNC_API',
 '__NUMPY_SETUP__',
 '__all__',
 '__builtins__',
 '__cached__',
 '__config__',
 '__deprecated_attrs__',
 '__dir__',
 '__doc__',
 '__expired_functions__',
 '__file__',
 '__getattr__',
 '__git_version__',
 '__loader__',
 '__mkl_version__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '__version__',
 '_

## numpy Datatypes
### float32 and float64

In [4]:
x = np.float32(2 ** 127)
x, type(x)

(1.7014118e+38, numpy.float32)

In [5]:
x = np.float32(2 ** 128)
x, type(x)

(inf, numpy.float32)

In [6]:
x = np.float64(2 ** 1023)
x, type(x)

(8.98846567431158e+307, numpy.float64)

In [7]:
x = np.float64(2 ** 1024)
x, type(x)

OverflowError: int too large to convert to float

## Arrays have a fixed datatype

In [None]:
list = [1.,2.,3.,4.,5.]
array = np.array(list)
array, type(array), array.dtype

In [None]:
# np.arange creates array similar to how one would use a for loop // generator to create a list
start = 0
end = 100
interval = 10
range_array = np.arange(start,end + 1, interval)
range_array

In [None]:
range_list = [i for i in range(start, end + 1, interval)]
range_list

## Two Dimensional Lists and Arrays

### Create Two Dimensional List

In [None]:
list_of_lists = [[1,2,3],[4,5,6],[7,8,9]]
list_of_lists

### Transform that Two Dimensional List into Two Dimensional Array

In [None]:
two_dim_array = np.array(list_of_lists)
two_dim_array

In [None]:
two_dim_array[0]

In [None]:
two_dim_array[0][0]

### Call the 1st Element of the 0th row

In [None]:
two_dim_array[0][1]

### Use for loop to call the first index in the two_dim_array as i, calling two_dim_array will yield ith row

In [None]:
for i in range(len(two_dim_array)):
    print(i)
    print(two_dim_array[i])

### Passing the row to a second for loop will cycle through the second row index as j. Calling row j will yield the jth value from the row

In [None]:
print("i j:")
for i in range(len(two_dim_array)):
    row = two_dim_array[i]
    for j in range(len(two_dim_array[i])):
        print(i,j)
        print("val:", row[j])

In [None]:
for row in two_dim_array:
    print(row)
    for val in row:
        print(val)

## np.zeros(), np.ones(), np.empty(), np.zeros_like(), np.ones_like(), np.empty_like()

In [None]:
np.zeros((10,10))

In [None]:
np.empty((10,10))

In [None]:
np.ones((10,10))

In [None]:
l = list_of_lists
np.zeros_like(l)

In [None]:
l = list_of_lists
np.ones_like(l)

In [None]:
np.empty_like(l)

## Log Values

In [None]:
# np.log() uses log base e
np.log(1)

In [None]:
np.log(np.e)

In [None]:
np.log(two_dim_array)

In [None]:
pi = np.pi
e = np.e
lne = np.log(e)
infinity = np.inf
null_val = np.nan
pi, e, lne, infinity, null_val

## Pandas

In [None]:
data_dict = {"0 to 9":np.arange(10),
            "ones":np.ones(10),
            "zeros":np.zeros(10)}
data_dict

In [None]:
df = pd.DataFrame(data_dict)
df

In [None]:
df["0 to 9"], type(df["0 to 9"])

### A Dataframe consists of one or more series

In [None]:
## calling a list of keys will return a dataframe
df[["0 to 9"]]

### Calling by Row

In [None]:
df.iloc[0]

In [None]:
df.loc[[0,1,2,3]]

In [None]:
# this is often needed for pandas to save certain values for you
df.loc[:3, "0 to 9"]

In [None]:
# Row indices are keys. Using df.loc() allows you to call each row by name

In [None]:
df.T

In [None]:
df.T.loc["0 to 9"]

In [None]:
df.T.iloc[0]

## Boolean Series and Arrays

In [None]:
greater_than_3 = df["0 to 9"] > 3
greater_than_3

In [None]:
df[greater_than_3]

In [None]:
df[df["0 to 9"] > 3]

In [None]:
# Create artificial data

macro_dict = {"GDP": {},
              "Money": {},
              "Real GDP": {},
              "Price Level": {}}
for key in macro_dict.keys():
    for i in range (1990, 2021):
        macro_dict[key][i] = np.random.random() * 10000

macro_dict

In [None]:
macro_DF = pd.DataFrame(macro_dict)
macro_DF

In [None]:
macro_DF[["GDP"]]

In [None]:
macro_DF["Velocity"] = macro_DF["Price Level"].mul(macro_DF["Real GDP"].div(macro_DF["Money"]))
macro_DF

## matplotlib

In [None]:
import matplotlib.pyplot as plt
macro_DF.plot()

In [None]:
#plot using method for matplotlib from the pandas Dataframe
fig, ax = plt.subplots(figsize = (20,12))
macro_DF.plot(linewidth = 5, alpha = .7, legend = False, ax = ax) #color = "cb"
ax.legend(fontsize = 20)

In [None]:
fig, ax = plt.subplots(figsize = (10,6))
for key in macro_DF:
    macro_DF[key].plot.line()
    ax.set_title(key, fontsize = 20)

In [None]:
# our goal is to create a multiplot visualization with a seperate plot area for each variable
num_vars = len(macro_DF.keys())
fig, ax = plt.subplots(1, num_vars, figsize = (20,6))

for i in range (num_vars):
    key = list(macro_DF.keys())[i]
    macro_DF[key].plot.line(ax = ax[i])
    ax[i].set_title(key, fontsize = 20)

In [None]:
# plot using method for matplotlib from the pandas DataFrame
num_vars = len(macro_DF.keys())

# create a plot with a subplot for each variable (total 5)
########

# to plot each variable on each plot, cycle through index of the keys
for i in range(num_vars):
    
    ax = axs[i]
    
    key = list(macro_DF.keys())[i]
    alpha = .1 + .15 * i 
    color = "C" + str(i)
    macro_DF[key].plot.line(color = color, alpha = alpha, ax = ax)
    ax[i].set_title(key, fontsize = 20)

In [None]:
for key in macro_DF:
    fig, ax = plt.subplots(figsize = (10,6))
    macro_DF[key].plot.line()
    ax.set_title(key, fontsize = 20)