# Performance 3

In [None]:
# known import statements
import pandas as pd
import csv
from subprocess import check_output

# new import statements
import zipfile
from io import TextIOWrapper

### Let's take a look at the files inside the current working directory.

In [None]:
str(check_output(["ls", "-lh"]), encoding="utf-8").split("\n")

### Let's `unzip` "wi.zip".

In [None]:
check_output(["unzip", "wi.zip"])

### Let's take a look at the files inside the current working directory.

In [None]:
str(check_output(["ls", "-lh"]), encoding="utf-8").split("\n")

### Traditional way of reading data using pandas

In [None]:
df = pd.read_csv("wi.csv")

In [None]:
df.head(5) # Top 5 rows within the DataFrame

### How can we see all the column names?

In [None]:
df.columns

### How to extract `interest_rate`?

In [None]:
df # observe that there are missing values

### How to count unique values in a column `Series`?

In [None]:
df["interest_rate"]

### Let's eliminiate the strings (Exempt) and missing values (NaN).
Let's try `pd.to_numeric(...)`. We need a way to specify that strings need to be converted into NaN values.

In [None]:

# TODO: open the documentation and figure out what parameter will help us
# Recall that we can press shift + tab after a function name to open the documentation

In [None]:
pd.to_numeric(df["interest_rate"], errors="coerce")

### Let's drop the NaN values and compute average interest rate.

In [None]:
pd.to_numeric(df["interest_rate"], errors="coerce")

In [None]:
pd.to_numeric(df["interest_rate"], errors="coerce").dropna()

### Clearing memory using re-assignment.
In python, you can clear memory used up for an object simply by getting rid of all the active references. But we cannot do that for the current notebook because we used "df" to perform other operations, so there are more than one active references. In fact, we don't even have access to some of the active references. In that case, you can only free up the memory after you "shutdown" the current notebook.

In [None]:
df = "some string" # you can also use df = None or df = 123 to clear memory

### How can we read the data without creating an uncompressed version called "wi.csv"?

- Why would we want to do something like that?
    1. lower storage usage (you can directly work with compressed data)
    2. lower memory usage (we can try to load information on one loan at a time, instead of all the loans): that will still work for average interest rate computation
    
**IMPORTANT**: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)

In [None]:
# IMPORTANT: do not run this cell code unless you shutdown the notebook - your kernel will crash (you will run out of memory space)
f = open("wi.csv")
# instead of passing relative path of file name, we can pass a file object instance reference
df = pd.read_csv(f) 
f.close()

### Let's free up memory and delete "wi.csv".

In [None]:
df = "some string"

In [None]:
check_output(["rm", "wi.csv"])
str(check_output(["ls", "-lh"]), encoding="utf-8").split("\n")

### How can we read data directly from a zip file?
`zipfile.ZipFile(...)`

### Goals:
1. Save storage space: directly access the data without decompressing: `zipfile.ZipFile(...)` - saves storage space by directly opening a zip file 
2. Save memory space: only look at one row at a time: `csv.DictReader(...)` - saves memory space by enabling us to read one row at a time (as `dict`)

In [None]:
# code for goal 1

zf = zipfile.ZipFile("wi.zip")
f = zf.open("wi.csv")

df = pd.read_csv(f) 

f.close()
zf.close()

In [None]:
# Free up the memory again
df = "some string"

In [None]:
# Code for goal 2

zf = zipfile.ZipFile("wi.zip")
f = zf.open("wi.csv")

reader = csv.DictReader(f)

for row in reader:
    print(row)
    break

f.close()
zf.close()

### Let's learn more modes for `open` built-in function
- `open(..., mode="r")`   => text (default)
- `open(..., mode="rb")`  => bytes
- `zf.open(...)`          => always bytes

With `zipfile` module there isn't a way for us to specify that we need text.

### `TextIOWrapper` inside `io` module enables us to convert `bytes` into `str`

In [None]:
# code for goal 2

zf = zipfile.ZipFile("wi.zip")
f = zf.open("wi.csv")

reader = csv.DictReader(f)

for row in reader:
    print(row)
    break

f.close()
zf.close()

### Let's go back to calculating average interest rate.
- Algorithm / Pseudocode steps:
    1. print "interest rate" and type of "interest rate"
    2. convert "interest rate" into `float` - how can we handle errors? `try` ... `except` ... (*IMPORTANT*: always have your `except` block catch specific exceptions)
    3. calculate running total, count for each row of data
    4. calculate average

In [None]:
# code for goal 2

zf = zipfile.ZipFile("wi.zip")
f = zf.open("wi.csv")

reader = csv.DictReader(TextIOWrapper(f))

for row in reader:
    print(row)
    break

f.close()
zf.close()

total / count

### Let's generalize the code to read "interest rate" into a function.

- This does make things worse because we are going back to reading all the data before doing the computation.
- But this sets us up to learn about generators.

In [None]:
def get_rates_v1():
    pass

### Using a generator
- `yield` each value
- use `next` to get the next value => internally `for` loop invokes `next` for each iteration

In [None]:
def get_rates_v2():
    pass

In [None]:
next(rates) # gives us the next value

In [None]:
next(rates) # gives us the next value

In [None]:
next(rates) # gives us the next value

### Let's use `for` loop to keep getting all the rates.

- `len` function doesn't work with generators
- indexing doesn't work with generators

In [None]:
len(rates)

In [None]:
rates[4]

In [None]:
rates = get_rates_v2()

total = 0
count = 0

 # keeps calling next(rates) to get values from yield

    
total / count

This approach doesn't work for median calculation. Why? Remember we have to sort, so we need all values in memory.

In [None]:
rates = list(get_rates_v2())
rates.sort()

In [None]:
# assume len(rates) is odd
median_value = rates[len(rates)//2]
median_value

# OOP 1: Classes

- Creating new types using classes
- Types have specific attributes and methods (special functions)
- Using new types (classes), we can create object instances of those types
- class creation and instantiation syntax: 
```python
class Person:
    # some code
p1 = Person() # object instantiation using constructor
p2 = Person() # object instantiation using constructor
```
- attribute / method access syntax:
```python
p1.fname = "..." # attribute initialization
p1.lname = "..." # attribute initialization
```

#### PythonTutor example

In [None]:
p1 = {"fname": "Bob", "lname": "Baker"}

p2 = dict()
p2["fname"] = "Cindy"
p2["lname"] = "Cooper"

p3 = {"Fname": "Alice", "lname": "Anderson"}

# TODO: Let's define a Person class

### Let's create a `Dog` class.

In [None]:
# eventually we will learn how to write code inside a class

### Let's create `Dog` object instances and add attributes.

### Let's define a `speak` function that will make the `Dog` bark.
- Algorithm / pseudocode steps:
    1. puppies bark thrice (age < 2)
    2. dogs bark once

### `f-strings`

- aka formatted string literals
- easier and quicker way of formatting `str` than `str.format(...)` method

- Syntax: 
```python
f"{} ..."
```
- inside `{}` you can specify a variable or even call a function or a method

In [None]:
def speak(dog):
    if dog.age < 2:
        pass
    else:
        pass

### Let's invoke `speak` for dog1 and dog2.

In [None]:
speak(dog1)

In [None]:
speak(dog2)

### How can we standardize the attribute initialization to avoid bugs?

- Eventually we will learn about how to define methods inside the class, which will include `__init__` method.
- For now, let's define an `init` function.

In [None]:
def init(???):
    pass

In [None]:
dog2 = Dog()
init(???)
speak(dog2)

### What if there are two `speak` functions? Let's define a Cat class and corresponding `speak` function.

In [None]:
class Cat:
    pass

cat1 = Cat()

def speak(cat):
    """
    Cats meow!
    """
    print("meow!")

### What will be the output of the below function calls?

In [None]:
speak(dog1)
speak(dog2)
speak(cat1)

### We lost the previous definition of the `speak` function because it is a function. What if `speak` were a method instead?

### **IMPORTANT**: it is not recommended to re-define same `class`. This is shown only for example purposes. You must always go back to the original cell and update the definition there.

In [None]:
class Dog:
    # regular method
    def init(dog, name, how_old): 
        dog.name = name
        dog.age = how_old
    
    # regular method
    def speak(dog):
        """
        Puppies (age < 2) bark thrice, whereas dogs bark once.
        """
        if dog.age < 2:
            #print(dog.name + ": bark bark bark!")
            print(f"{dog.name}: bark bark bark!")
        else:
            #print(dog.name + ": bark!")
            print(f"{dog.name}: bark!")

class Cat:
    def speak(cat):
        """
        Cats meow!
        """
        print("meow!")
        
# Let's create object instances
dog1 = Dog()
Dog.init(dog1, "Jimmy", 1)

dog2 = Dog()
Dog.init(dog2, "Buster", 10)

cat1 = Cat()

In [None]:
# speak now is a method, so we need to use . attribute operator for invocation
speak(dog1)
speak(dog2)
speak(cat1)

### Type-based dispatch

In [None]:
animals = [dog1, dog2, cat1]

for animal in animals:
    print(type(animal))

#### Even though `type` output displays additional details, in essense type is just name of the class: `Dog`, `Cat`, etc.,.

In [None]:
type(dog1) == Dog

In [None]:
type(cat1) == Cat

#### Let's invoke speak for all animals.

In [None]:
# v1: bad version
for animal in animals:
    if type(animal) == Dog:
        Dog.speak(animal)
    elif type(animal) == Cat:
        Cat.speak(animal)
    # this conditional will keep growing as we add more and 
    # more animal classes!

#### Here is a slightly better version

In [None]:
for animal in animals:
    type(animal).speak(animal)

### Method invocation (most commonly used syntax)

Notice how the animal is redundant. There is a better way to invoke methods.

- Syntax: `obj_ref.method()`
- `obj_ref` itself will be the first argument to the method.

In [None]:
for animal in animals:
    # this is equivalent to type(animal).speak(animal)

#### Let's try passing an argument to `speak` method.

In [None]:
dog1.speak("hello")
# Observe how TypeError says 1 positional argument expected

## `self`

- dedicated special variable that refers to the current object instance (aka receiver) inside a class
- attribute access inside the class **must** always use `self.<attribute>` syntax

In [None]:
class Dog:
    # regular method
    def init(self, name, how_old): 
        self.name = name
        self.age = how_old
    
    # regular method
    def speak(self):
        """
        Puppies (age < 2) bark thrice, whereas dogs bark once.
        """
        if self.age < 2:
            #print(dog.name + ": bark bark bark!")
            print(f"{self.name}: bark bark bark!")
        else:
            #print(dog.name + ": bark!")
            print(f"{self.name}: bark!")


# Let's create Dog object instances
dog1 = Dog() 
Dog.init(dog1, "Jimmy", 1)

dog2 = Dog()
init(dog2, "Buster", 10)

# Invoke speak for dog1 and dog2
dog1.speak()
dog2.speak()

# OOP: Special Methods

"Special methods" is a technical term referring to methods that get called automatically. In Python, they usually begin and end with double underscores.
- **Note:** you could define a regular method with `__<method>__`.

### `__init__` special method (aka Constructor)

- automatically invoked when creating an object instance
- only one possible constructor in Python

In [None]:
# This is the correct and final version of Dog class
class Dog:
    # special method
    def __init__(self, name, how_old): 
        print("Creating a dog!")
        self.name = name
        self.age = how_old
    
    # regular method
    def speak(self):
        """
        Puppies (age < 2) bark thrice, whereas dogs bark once.
        """
        if self.age < 2:
            #print(dog.name + ": bark bark bark!")
            print(f"{self.name}: bark bark bark!")
        else:
            #print(dog.name + ": bark!")
            print(f"{self.name}: bark!")


# Let's create Dog object instances
dog1 = Dog("Jimmy", 1)
dog2 = Dog("Buster", 10)

# Invoke speak for dog1 and dog2
dog1.speak()
dog2.speak()