# Lecture 04 notes

# Example exercise. 

In [1]:
# Cultural Concept Class: Movie

# Define a Movie class
class Movie:
    def __init__(self, title, director, year, genre, rating):
        self.title = title
        self.director = director
        self.year = year
        self.genre = genre
        self.rating = rating

    def __repr__(self):
        return f"{self.title} ({self.year}) - {self.genre}, directed by {self.director}, rating: {self.rating}"

# Generate a dictionary dataset of Movie objects
movies_dataset = {
    "inception": Movie("Inception", "Christopher Nolan", 2010, "Sci-Fi", 8.8),
    "parasite": Movie("Parasite", "Bong Joon-ho", 2019, "Thriller", 8.6),
    "pulp_fiction": Movie("Pulp Fiction", "Quentin Tarantino", 1994, "Crime", 8.9),
    "spirited_away": Movie("Spirited Away", "Hayao Miyazaki", 2001, "Animation", 8.6),
    "the_godfather": Movie("The Godfather", "Francis Ford Coppola", 1972, "Crime", 9.2)
}

# Global variable: minimum rating for a "top movie"
MIN_TOP_RATING = 8.8

# Function to extract all movies of a given genre
def get_movies_by_genre(dataset, genre):
    return [movie for movie in dataset.values() if movie.genre.lower() == genre.lower()]

# Function to extract top-rated movies using the global variable
def get_top_movies(dataset):
    global MIN_TOP_RATING
    return [movie for movie in dataset.values() if movie.rating >= MIN_TOP_RATING]

# Function to get average rating (demonstrates local variable)
def average_rating(dataset):
    ratings = [movie.rating for movie in dataset.values()]
    avg = sum(ratings) / len(ratings)  # local variable avg
    return avg

# Example usage
print("All movies in the dataset:")
for movie in movies_dataset.values():
    print(movie)

print("\nCrime movies:")
print(get_movies_by_genre(movies_dataset, "Crime"))

print("\nTop-rated movies:")
print(get_top_movies(movies_dataset))

print("\nAverage rating of all movies:")
print(average_rating(movies_dataset))

All movies in the dataset:
Inception (2010) - Sci-Fi, directed by Christopher Nolan, rating: 8.8
Parasite (2019) - Thriller, directed by Bong Joon-ho, rating: 8.6
Pulp Fiction (1994) - Crime, directed by Quentin Tarantino, rating: 8.9
Spirited Away (2001) - Animation, directed by Hayao Miyazaki, rating: 8.6
The Godfather (1972) - Crime, directed by Francis Ford Coppola, rating: 9.2

Crime movies:
[Pulp Fiction (1994) - Crime, directed by Quentin Tarantino, rating: 8.9, The Godfather (1972) - Crime, directed by Francis Ford Coppola, rating: 9.2]

Top-rated movies:
[Inception (2010) - Sci-Fi, directed by Christopher Nolan, rating: 8.8, Pulp Fiction (1994) - Crime, directed by Quentin Tarantino, rating: 8.9, The Godfather (1972) - Crime, directed by Francis Ford Coppola, rating: 9.2]

Average rating of all movies:
8.82


## Code wish list request 1: What is class inheritance?

In [2]:
# Class inheritance.

# Parent class. 
class Human:
    def __init__(self, name):
        self.name = name

    def info(self):
        print("Human name:", self.name)

# Child class. 
class Baby(Human):
    def sound(self):
        print(self.name, "cries")

# Initialize object of child class that inherits methods from parent class. 
d = Baby("Sarah")
d.info()      # Inherited method
d.sound()

Human name: Sarah
Sarah cries


In [3]:
# Using super() to efficiently inherit methods. 

# Parent Class: Human
class Human:
    def __init__(self, name):
        self.name = name

    def info(self):
        print("Human name:", self.name)

# Child Class: Baby
class Baby(Human):
    def __init__(self, name, age):
        super().__init__(name)   # Initializes name method from parent class without having to rewrite the function.
        self.age = age

    def details(self):
        print(self.name, "is", self.age)

d = Baby("Sarah", "3 months")
d.info()      # Parent method
d.details()   # Child method

Human name: Sarah
Sarah is 3 months


## Code wish list request 2: What is polymorphism?

In [4]:
class Adult:
  def sound(self):
    return "Language"

class Baby:
  def sound(self):
    return "Babble"

def make_sound(human_type):
  print(human_type.sound())

# Create objects
baby = Baby()
adult = Adult()

# Same function call but different behaviour. 
make_sound(baby)  
make_sound(adult)  


Babble
Language


In [5]:
# Method overriding. 

class Human:
  def speak(self):
    return "Language"

class Baby(Human):
  def speak(self):
    return "Babble"

class Adolescent(Human):
  def speak(self):
    return "Teenspeak"

# Create objects
human = Human()
baby = Baby()
teen = Adolescent()

# Same method name, different behaviors
print(human.speak())  
print(baby.speak())     
print(teen.speak())     


Language
Babble
Teenspeak


In [6]:
# Operator overloading.
# Example from: https://www.codecademy.com/article/understanding-polymorphism-in-python

class Point:
  def __init__(self, x, y):
    self.x = x
    self.y = y

  def __add__(self, other):
    return Point(self.x + other.x, self.y + other.y)

  def __str__(self):
    return f"Point({self.x}, {self.y})"

# Create Point objects
p1 = Point(1, 2)
p2 = Point(3, 4)

# Using the + operator on Point objects
p3 = p1 + p2
print(p3)  # Output: Point(4, 6)


Point(4, 6)


## Data Frames

In [7]:
# create a set of list variables.
# Example from Weed & Navarro textbook. 

age = [17, 19, 21, 37, 18, 19, 47, 18, 19]
score = [12, 10, 11, 15, 16, 14, 25, 21, 29]
rt = [3.552, 1.624, 6.431, 7.132, 2.925, 4.662, 3.634, 3.635, 5.234]
group = ["test", "test", "test", "test", "test", "control", "control", "control", "control"]

# what do we do to link these variables???

In [8]:
# Turn into dataframe.
# Example from Weed & Navarro textbook. 

# import pandas
import pandas as pd

df = df = pd.DataFrame(
    {
        'age': age,
        'score': score,
        'rt': rt,
        'group': group
    }
)

df

Unnamed: 0,age,score,rt,group
0,17,12,3.552,test
1,19,10,1.624,test
2,21,11,6.431,test
3,37,15,7.132,test
4,18,16,2.925,test
5,19,14,4.662,control
6,47,25,3.634,control
7,18,21,3.635,control
8,19,29,5.234,control


In [9]:
# check attributes.

# type
df.dtypes

# Index.
df.index

# columns.
df.columns

# copy.
df.copy

<bound method NDFrame.copy of    age  score     rt    group
0   17     12  3.552     test
1   19     10  1.624     test
2   21     11  6.431     test
3   37     15  7.132     test
4   18     16  2.925     test
5   19     14  4.662  control
6   47     25  3.634  control
7   18     21  3.635  control
8   19     29  5.234  control>

In [10]:
d = {'col1': [0, 1, 2, 3], 'col2': pd.Series([2,3,4,5])}
d_df = pd.DataFrame(data=d, index=[0, 1, 2, 3])




## What are the types of data that can be passed into DataFrame object?

In [11]:
# ndarray 
# (example from https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html)

import numpy as np

# define np array.
np_array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]])


df2 = pd.DataFrame(np_array, 
                   columns=['a', 'b', 'c'])

np_array, df2


(array([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]]),
    a  b  c
 0  1  2  3
 1  4  5  6
 2  7  8  9)

In [12]:
# nparrays can be different types.

# Numeric array
a = np.array([1, 2, 3])

# String array
b = np.array(['a', 'b', 'c'])

# Object array (can hold mixed types)
c = np.array([1, 'a', 3.5], dtype=object)

a, b, c

(array([1, 2, 3]),
 array(['a', 'b', 'c'], dtype='<U1'),
 array([1, 'a', 3.5], dtype=object))

In [13]:
# iterables (lists, tuples, any type that can iterated over in a for loop).

# list of lists.
data = [
    [17, 12, 3.552, "test"],
    [19, 10, 1.624, "test"],
    [21, 11, 6.431, "test"]
]

df = pd.DataFrame(data, columns=['age', 'score', 'rt', 'group'])
print(df)

   age  score     rt group
0   17     12  3.552  test
1   19     10  1.624  test
2   21     11  6.431  test


In [14]:
# data frame from data frame.
# (example from https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.html)


df1 = pd.DataFrame([1, 2, 3], index=["a", "b", "c"], columns=["x"])
df2 = pd.DataFrame(data=df1, index=["a", "c"])
df2


Unnamed: 0,x
a,1
c,3


In [15]:
# indices.

df_indexed = pd.DataFrame(
    {
        'age': age,
        'score': score,
        'rt': rt,
        'group': group
    }, 
    index = range(len(age))
)

df_indexed


Unnamed: 0,age,score,rt,group
0,17,12,3.552,test
1,19,10,1.624,test
2,21,11,6.431,test
3,37,15,7.132,test
4,18,16,2.925,test
5,19,14,4.662,control
6,47,25,3.634,control
7,18,21,3.635,control
8,19,29,5.234,control


In [16]:
# columns.

df_indexed = pd.DataFrame(
    {
        'age': age,
        'score': score,
        'rt': rt,
        'group': group
    }, 
    index=range(len(age))
)

# Change column labels
df_indexed.columns = ['Age (years)', 'Test Score', 'Response Time', 'Group']

df_indexed


Unnamed: 0,Age (years),Test Score,Response Time,Group
0,17,12,3.552,test
1,19,10,1.624,test
2,21,11,6.431,test
3,37,15,7.132,test
4,18,16,2.925,test
5,19,14,4.662,control
6,47,25,3.634,control
7,18,21,3.635,control
8,19,29,5.234,control


In [17]:
# force data to be of same type (only possible when data are not mixed).

d = {'col1': [1, 2], 'col2': [3, 4]}
df1 = pd.DataFrame(data=d)

df2 = pd.DataFrame(data=d, dtype=np.int8)

df1.dtypes, df2.dtypes


(col1    int64
 col2    int64
 dtype: object,
 col1    int8
 col2    int8
 dtype: object)

In [18]:
# copy = false.

# Create a np array.
arr = np.array([[1, 2], [3, 4]])

# Create a DataFrame using the array, with copy=False
df = pd.DataFrame(arr, copy=False)

# Change a value in the original array
arr[0, 0] = 99

# The change is reflected in the DataFrame
print(df)

    0  1
0  99  2
1   3  4


In [19]:
# copy = true

# Create a np array.
arr = np.array([[1, 2], [3, 4]])

# Create a DataFrame using the array, with copy=True
df = pd.DataFrame(arr, copy=True)

# Change a value in the original array
arr[0, 0] = 99

# The change is reflected in the DataFrame
print(df)

   0  1
0  1  2
1  3  4


In [None]:
art_df = pd.read_csv("https://github.com/metmuseum/openaccess/blob/e901de145e60258542243571098245826a01fe47/MetObjects.csv")

KeyboardInterrupt: 