# Intro to Python

Computer programming is extremely useful for quickly solving complex optimization problems. This notebook will go through the typical data structures and manipulations that we'll use throughout the course.

## 1. Data Structures

The main data structures we'll use throughout the course are:

### 1a. Numpy arrays  
Numpy arrays are used to store numerical data (floating point or integer). They can be vectors (1D arrays), matrices (2D arrays), and can have >2 dimensions. They can be initialized as empty, with ones, zeros, or specific numbers.

In [None]:
import numpy as np

# initialize empty numpy arrays
x = np.empty([5]) # vector of length 5
print(" x = np.empty([5]) \n x =", x)
x = np.empty([2,3]) # array with 2 rows and 3 columns
print("\n x = np.empty([2,3]) \n x =", x)

In [None]:
# intiailize numpy array of all 0s
x = np.zeros([5])
print(" x = np.zeros([5]) \n x =", x)
x = np.zeros([2,3])
print("\n x = np.zeros([2,3]) \n x =", x)

In [None]:
# intiailize numpy array of all 1s
x = np.ones([5])
print(" x = np.ones([5]) \n x =", x)
x = np.ones([2,3])
print("\n x = np.ones([2,3]) \n x =", x)

In [None]:
# intiailize numpy array of all 5s
x = np.ones([5])*5
print(" x = np.ones([5])*5 \n x =", x)
x = np.ones([2,3])*5
print("\n x = np.ones([2,3])*5 \n x =", x)

In [None]:
# intialize numpy array with specific values
x = np.array([1,5,3,2,7])
print(" x = np.array([1,5,3,2,7]) \n x =", x)
x = np.array([[2,3,5], [4,1,7]]) # array with 2 rows and 3 columns; 1st row is [2,3,5], 2nd row is [4,1,7]
print("\n x = np.array([[2,3,5], [4,1,7]]) \n x =", x)

In [None]:
# determine shape of array (# of rows, # of columns)
x = np.array([1,5,3,2,7])
print("x = ", x)
print("np.shape(x) =", np.shape(x))

x = np.array([[2,3,5], [4,1,7]])
print("\nx = ", x)
print("np.shape(x) =", np.shape(x))

In [None]:
# access particular element of 2D array
print(" x = ", x)
print("\n x[0,1] =", x[0,1]) # first row, second column

# access particular row of 2D array
print("\n x[0,:] =", x[0,:]) # first row

# access particular column of 2D array
print("\n x[:,1] =", x[:,1]) # second column

In [None]:
# initialize numpy array with sequence of values
x = np.arange(0,5) # array from [0,5) by 1
print(" x = np.arange(0,5) \n x =", x)
x = np.arange(0,10,2) # array from [0,10) increasing by 2
print("\n x = np.arange(0,10,2) \n x =", x)
y = np.arange(10,0,-2) # array from [10, 0) decreasing by 2
print("\n y = np.arange(10,0,-2) \n y =", y)

In [None]:
# access first n elements of array
print(" x = ", x)
n = 2
print("\n x[0:2]: ", x[0:n])

# access last n elements of array
n = 2
print("\n x[-2::]: ", x[-n::])

# access from 3rd to 3rd from last elements of array
print("\n x[2:-2]: ", x[n:-n])

In [None]:
print(" x = ", x)
print(" y = ", x)

# elementwise multiplication of two vectors
print("\n x*y = ", x*y)

# dot product of two vectors
print("\n np.dot(x,y) = ", np.dot(x,y))

In [None]:
# elementwise multiplication of two 2D arrays
x = np.array([[2,3,5], [4,1,7]])
print(" x = \n", x)
print("\n x*x = \n", x*x)

# dot product of two 2D arrays
y = np.transpose(x)
print("\n y = np.tranpose(x) = \n", y)
print("\n np.dot(x,y) = \n", np.dot(x,y))

### 1b. Pandas data frames  
Pandas data frames are like arrays but they can include data of multiple types (e.g. floating point numbers like arrays, integers, and strings). They also have headers like tables that make it easier than numpy arrays to keep track of what the data represents.

In [None]:
import pandas as pd

# create new pandas data frame
cities_pets_df = pd.DataFrame({'Cities': ['Charlottesville','Richmond','Washington, DC','Norfolk','Virginia Beach'],
                               'Dogs': [150, 300, 250, 200, 500],
                               'Cats': [50, 60, 100, 20, 50],
                               'Hamsters': [5, 10, 50, 2, 4]})
cities_pets_df

In [None]:
# convert numpy array to pandas data frame
x = np.array([[150, 300, 250, 200, 500],
               [50, 60, 100, 20, 50],
              [5, 10, 50, 2, 4]])
print(" x as array: \n", x)

cities_pets_df = pd.DataFrame(np.transpose(x), columns = ['Dogs', 'Cats', 'Hamsters'])
print("\n x as data frame: \n")
cities_pets_df

In [None]:
# add column to pandas data frame
# 0 tells us to put it in the first column, 1 would be the 2nd etc.
cities_pets_df.insert(0, column='Cities', value=['Charlottesville','Richmond','Washington, DC','Norfolk','Virginia Beach'])
cities_pets_df

In [None]:
# add row to pandas data frame
cities_pets_df.loc[len(cities_pets_df.index)] = ['Harrisonburg', 30, 80, 0]
cities_pets_df

In [None]:
# access particular column of pandas data frame
cities_pets_df["Dogs"]

In [None]:
# 2nd column (also Dogs)
cities_pets_df.iloc[:,1]

In [None]:
# access particular row of pandas data frame
print("Third row of data frame:")
cities_pets_df.iloc[2,:]

In [None]:
# access particular element of pandas data frame
print("Third row, fourth column of data frame:")
cities_pets_df.iloc[2,3]

In [None]:
# write pandas data frame to file and then list files in directory to see if it's there
from google.colab import drive
import os
drive.mount('/content/drive')
print("Files in Colab Notebooks directory before writing data frame to csv")
print(os.listdir("/content/drive/MyDrive/Colab Notebooks"))

cities_pets_df.to_csv("/content/drive/MyDrive/Colab Notebooks/cities_pets.csv")
print("\n Files in Colab Notebooks directory after writing data frame to csv")
print(os.listdir("/content/drive/MyDrive/Colab Notebooks"))

In [None]:
# read in pandas date frame from file
cities_pets_test = pd.read_csv("/content/drive/MyDrive/Colab Notebooks/cities_pets.csv")
cities_pets_test

In [None]:
# convert Cities column to index
# inplace = True will do it to this data frame rather than a new one
cities_pets_test.set_index("Cities", inplace=True)
cities_pets_test

In [None]:
# remove former index column
cities_pets_test.drop("Unnamed: 0", axis=1, inplace = True)
cities_pets_test

In [None]:
# drop Norfolk row from data frame
cities_pets_test.drop("Norfolk", axis=0, inplace = True)
cities_pets_test

In [None]:
# plot data from pandas data frame in bar chart
cities_pets_test.plot(kind="bar")

In [None]:
# plot data in line chart (doesn't really make sense for this data, but would for time series)
cities_pets_test.plot(kind="line")

In [None]:
# plot number of dogs vs. number of cats in scatter plot
cities_pets_test.plot(x="Dogs",y="Cats",kind="scatter")

### 1c. Lists  
Lists are similar to 1D numpy arrays but they can contain either numbers or strings (both not both). They cannot be matrices, but lists can be nested within other lists. Instead of refering row 3, column 4 like x[2,3] for an array, you can reference the 4th element of the 3nd list within as list as x[2][3].

In [None]:
# create empty list
x = []
print(" x = [] \n x =", x)

In [None]:
# append to list
x.append(5)
print(" x.append(5) \n x =", x)

In [None]:
# append another list to create a list of lists
x.append([6,7,9])
print(" x.append([6,7,9]) \n x =", x)

In [None]:
# find length of list
print("len(x) =", len(x))

In [None]:
# find length of list within list
print("len(x[1]) =", len(x[1]))

### 1d. Dictionaries

Dictionaries are collections of key:value pairs. Keys are similar to the column labels of a data frame and values are similar to the entries in that column. Where a dictionary is more flexible is that the entries associated with that label do not have to be vectors, but can be any data type.

In [None]:
# initialize dictionary with three keys
example_dict = {
  "obj": "Minimize",
  "vars": [5,2,4],
  "parameter": 2
}

print("Objective = ", example_dict["obj"])
print("Decision variables = ", example_dict["vars"])
print("Parameter = ", example_dict["parameter"])

In [None]:
# alternative way of initializing same dictionary
example_dict2 = dict(obj = "Minimize",
                vars = [5,2,4],
                parameter = 2)

print("Objective = ", example_dict2["obj"])
print("Decision variables = ", example_dict2["vars"])
print("Parameter = ", example_dict2["parameter"])

## 2. For loops

For loops are useful for applying the same operation to the same row of an array/data frame, item of a list, etc.

In [None]:
# loop through n items
n = 10
count = 0
for i in range(n):
  count += i # add i to count
  print(count)

In [None]:
# loop from 3 to n
count = 0
for i in range(3,n):
  count += i # add i to count
  print(count)

In [None]:
# loop through rows of an array
x = np.array([[2,3,5], [4,1,7]])
print(" x =", x)

y = np.empty(np.shape(x))
for i in range(np.shape(x)[0]):
  y[i,:] = x[i,:]*(i+1)

print("\n y =", y)

In [None]:
# loop through columns of an array
x = np.array([[2,3,5], [4,1,7]])
print(" x = ", x)

y = np.empty(np.shape(x))
for i in range(np.shape(x)[1]):
  y[:,i] = x[:,i]*(i+1)

print("\n y =", y)

In [None]:
# nested for loop through columns and rows of an array
x = np.array([[2,3,5], [4,1,7]])
print(" x = ", x)

y = np.empty(np.shape(x))
for i in range(np.shape(x)[0]):
  for j in range(np.shape(x)[1]):
    y[i,j] = x[i,j]*(i+1)*(j+1)

print("\n y =", y)

In [None]:
# loop through items in a list
x = [2, 3, 6, 3, 1]
for item in x:
  print("x[i] = ", item)

In [None]:
# loop through items in list and keep track of iteration
for i, item in enumerate(x):
  print("x[" + str(i) + "] = " + str(item))

In [None]:
# loop through two lists simultaneously
y = [4, 9, 2, 7, 5]
for i,j in zip(x,y):
  print('i = ' + str(i) + ', j = ' + str(j))

## 3. Functions

Functions are useful if there are methods you would like to apply multiple times to different objects. For example, an objective function you want to optimize!

In [None]:
from scipy.optimize import minimize

def function(input1, input2, input3=5):
  return input1 + input1*input2 - (input1+input3)**2

output = function(2, 7) # uses default of 5 for input3
print("function(2, 7) =", output)

In [None]:
output = function(2, 7, 3) # uses 3 instead of default of 5 for input3
print("function(2, 7, 3) =", output)

In [None]:
# find value of input1 that minimizes function for input2=7, input3= default of 5
# give an initial estimate of 0 and bounds of [-10,10]
bounds = [[-10,10]]
BFGSresult = minimize(function, x0 = 0, bounds=bounds, args=(7,))
print(BFGSresult)

In [None]:
# find value of input1 that minimizes function for input2=7, input3= 3
# give an initial estimate of 0 and bounds of [-10,10]
BFGSresult = minimize(function, x0 = 0, bounds=bounds, args=(7,3,))
print(BFGSresult)

In [None]:
# find values of all inputs that minimize function over bounds [-10,10]
# need to redefine function to take inputs in as vector
def function2(x):
  return x[0] + x[0]*x[1] - (x[0]+x[2])**2

bounds = [[-10,10]]*3
print(bounds)

In [None]:
BFGSresult = minimize(function2, x0 = [0,0,0], bounds=bounds)
print(BFGSresult)

## 4. Classes

Classes allow you to create objects of that class with the same attributes and methods. You probably won't need to write any classes for this course, but a lot of the functions we use will return objects of a class and you'll want to access attributes of that object, or apply methods (functions) to it.

For example, scipy.optimize.minimize() returns a "result" object. This has the attributes "x" and "fun" associated with it (among others). We can access those with result.x and result.fun.

Above, the pandas data frames we created were objects of the DataFrame class. This enabled us to use the method plot() associated with objects of the DataFrame class.

In [None]:
class Problem():
  def __init__(self, vars, function): # initialize Problem class with certain attributes
    self.vars = vars # decision variables
    self.function = function # evaluation function

  def evaluate(self): # method of Problem class
    self.obj = self.function(self.vars) # apply function of Problem class to determine objective function value from vars

# define function that will be the function of the Problem class
def calcRevenue(x):
  return 5*x[0] - 3*x[1] # revenue from metal - cost of wastewater treatment

# initialize object of the Problem class called metalWaste with decision variables
# of [10000,5000] for metal waste produced and amount of wastewater treated
metalWaste = Problem(vars = [10000,5000], function = calcRevenue)

print("metalWaste.vars = ", metalWaste.vars)
print("metalWaste.function = ", metalWaste.function)

# print metalWaste.obj - it shouldn't exist yet because we need to call the evaluate function to calculate it
try:
  print(metalWaste.obj) # print if it exists
except:
  print("metalWaste.obj does not exit") # print if error in accessing metalWaste.obj because it does not exist


In [None]:
# apply evaluate method to metalWaste object
metalWaste.evaluate()

# print metalWaste.obj computed from metalWaste.evaluate
print(metalWaste.obj)

## 5. Plotting

Programming is extremely useful for visualizing data. Matplotlib is the main library for plotting. Check this link for examples of how to make different types of plots: https://matplotlib.org/stable/plot_types/index.html. Below, I'll simply show how to make line plots with multiple subplots and different axes scales (linear vs. log).

Some other helpful resources for determining colors are:
Matplotlib colors: https://matplotlib.org/stable/gallery/color/named_colors.html  
Matplotlib colormaps: https://matplotlib.org/stable/users/explain/colors/colormaps.html  
Colorbrewer for determining what types of colormaps to use for sequential, diverging, or categorical/qualitative data that is print friendly, color-blind safe and/or photocopy safe: https://colorbrewer2.org/#type=sequential&scheme=BuGn&n=3

In [None]:
from matplotlib import pyplot as plt

x = np.arange(0,100,5)
y = 3*x - 2*x**2
z = 2*x + 5*x**2

fig = plt.figure() # create figure

ax = fig.add_subplot(2,1,1) # add axis in 1st position of figure with 2 rows, 1 column
ax.plot(x, y, c="tab:blue")
ax.set_xlabel("x")
ax.set_ylabel("y")
ax2 = ax.twinx() # create new axis called ax2 that uses same x axis as ax
ax2.plot(x, z, c="tab:red")
ax2.set_ylabel("z") # label on opposite y axis

ax = fig.add_subplot(2,1,2) # add axis in 2nd position of figure with 2 rows, 1 column
ax.plot(y, z, c = "tab:blue")
ax.set_xlabel("y")
ax.set_ylabel("z")
ax2 = ax.twiny() # create new axis called ax2 that uses same y axis as ax
ax2.plot(x, z, c = "tab:red")
ax2.set_xlabel("x") # label on opposite x axis

fig.tight_layout() # adjust plots so no labels overlap

# figure numbering is across columns starting at 1, then moving to the next row
# so a 3 x 2 figure is numbered
# 1, 2
# 3, 4
# 5, 6

In [None]:
fig, ax = plt.subplots(2, 2) # create figure with 2 rows and 2 column

ax[0,0].plot(x, y, c="tab:blue") # subplot in first row and first column
ax[0,0].set_ylabel("y")
ax[0,1].plot(z, y, c="tab:red") # subplot in first row and second column
ax[1,0].plot(x, z, c="tab:green") # subplot in second row and first column
ax[1,0].set_xlabel("x")
ax[1,0].set_ylabel("z")
ax[1,1].plot(z, z, c="tab:orange") # subplot in second row and second column
ax[1,1].set_xlabel("z")

fig.tight_layout() # adjust plots so no labels overlap

In [None]:
# same figure as above but share x axes within columns and y axes within rows

fig = plt.figure()
gs = fig.add_gridspec(2, 2, hspace=0, wspace=0)
(ax1, ax2), (ax3, ax4) = gs.subplots(sharex='col', sharey='row')
fig.suptitle('Sharing x per column, y per row')
ax1.plot(x, y, c="tab:blue")
ax2.plot(z, y, c="tab:red")
ax3.plot(x, z, c="tab:green")
ax4.plot(z, z, c="tab:orange")

for ax in fig.get_axes():
    ax.label_outer() # only label outer axes

In [None]:
# make x axis log-scale
y = x**2

fig, ax = plt.subplots(1)
ax.semilogy(x,y)
ax.set_xlabel("x")
ax.set_ylabel("y")

In [None]:
# make y axis log-scale
y = np.sqrt(x)

fig, ax = plt.subplots(1)
ax.semilogx(x,y)
ax.set_xlabel("x")
ax.set_ylabel("y")

In [None]:
# make both axes log-scale
fig, ax = plt.subplots(1)
ax.loglog(x,y)
ax.set_xlabel("x")
ax.set_ylabel("y")