# 2. Tutorial 1: Python Basics

#### Naoki TANI
#### Center for Advanced Policy Studies (CAPS), Institute of Economic Research, Kyoto University
#### April 11, 2024

In [1]:
import numpy as np

## 1. Basic Python syntax

### 1-1. Variable assignment

In [2]:
# Assign the value 3 to the variable x
x = 3
x

3

In [3]:
y = 5
y

5

In [4]:
α = 2
α

2

### 1-2. Mathematical operators

In [5]:
#addition
x + y

8

In [6]:
#subtraction
y - x

2

In [7]:
#multiplication
x * y

15

In [8]:
#division
x/y

0.6

In [9]:
#power 
x**y
# not ^ 

243

In [10]:
#comparison operators
print("y is equal to 5 :", y==5)
print("y is greater than x :", y>x)
print("y is less than or equal to 5 :", y<=5)
print("x is greater than or equal to 3 :", x>=3)
print("x is greater than or equal to 3 :", x>=3)
print("x is not equal to 2 :", x!=2)

y is equal to 5 : True
y is greater than x : True
y is less than or equal to 5 : True
x is greater than or equal to 3 : True
x is greater than or equal to 3 : True
x is not equal to 2 : True


In [11]:
#comparison operators can be chained
print(0<x<=y==5)

True


### 1-3. Boolean operators(論理演算子)

In [12]:
#logical disjunction (論理和)
True | True

True

In [13]:
True | False

True

In [14]:
#logical conjunction (論理積)
True & True

True

In [15]:
True & False

False

In [16]:
#Example
(x==3) | (y==4)

True

In [17]:
(x==3) & (y==4)

False

### 1-4. Basic types (データ型)

In [18]:
type(x)

int

In [19]:
type(y)

int

In [20]:
type(x/y)

float

In [21]:
type(np.nan)

float

In [22]:
type("Kyoto")

str

In [23]:
#useful functions to test numbers for special values
np.isinf(x)

False

In [24]:
np.isinf(np.inf)

True

In [25]:
np.isnan(1)

False

In [26]:
np.isnan(np.nan)

True

In [27]:
i = np.zeros(1100)
n = np.array([np.nan])
X = np.concatenate([i, n, i])
sum(np.isinf(X))
sum(np.isnan(X))

1

### 1-5. Strings (文字列)

In [28]:
#Strings are written with quotes
'Kyoto'

'Kyoto'

In [29]:
#sting interpolation using formatted strings
x + y
print(f'The sum of x and y is equal to {x+y}')
print(f'{x} + {y} = {x+y:.2f}') #specify the number of decimal places

The sum of x and y is equal to 8
3 + 5 = 8.00


In [30]:
#concatenation
'To' + 'kyo'

'Tokyo'

In [31]:
'Kyoto'*3

'KyotoKyotoKyoto'

In [32]:
#substrings
#strings can be accessed as arrays (0 based indexing)
city = 'Kyoto prefecture'
city[0]

'K'

In [33]:
#slice noation ( [min,max) )
city[:-11]

'Kyoto'

In [34]:
#check if the strings contain a substring
"K" in city

True

In [35]:
#substrings are immutable
city[:-11] = 'kyoto'

TypeError: 'str' object does not support item assignment

In [36]:
city.replace('Kyoto', 'kyoto')

'kyoto prefecture'

In [37]:
#split strings into a list
city.split() #split the string using a space

['Kyoto', 'prefecture']

In [38]:
date="04-01"
d = date.split("-")
d

['04', '01']

In [39]:
#join strings
'-'.join(d)

'04-01'

### 1-6. Dynamic typing（動的型付け）

#### Python's dynamic typing allows us to skip the type declarations (型宣言) for variables: we can change data type of a specific variable seamlessly based on the value assigned to it.
#### This flexibility enables us to adapt our code without rigid type constraints.

In [40]:
x = 1
x = "hello"
# You can reassign the variable x to a string value.

### Example to demonstrate the dynamic nature of Python:

In [41]:
#Define one method of a function print_type
def print_type(x):
    print(f'{x} is a string.')

In [42]:
#Define another method of the same function
def print_type(x):
    print(f'{x} is a float.')

In [43]:
#Define yet another method of the fuction
def print_type(x):
    print(f'{x} is an integer.')

In [44]:
print_type(1)

1 is an integer.


#### You can define a function with multiple associate methods, which is not allowed in static-typing language.
#### In Python, the interpretation process determines the appropriate methods to be executed based on the latest definition available at runtime.

#### Notice that Julia uses `multiple dispatch` to decide which method of a fuction to execute (i.e. dispatch) when the fuction is called using argument types (integers, floats, complex numbers, or matrices).

In [45]:
#example of Julia code
#function print_type(x::String)
#    println("$x is a String")
#end

#### The dynamic-typing of Python benefits users to enhance flexibiliy and reusability of code.
#### However, there are trade-offs between flexibility and performance.
#### Python object contains not only its value, but also other information: type code, data bytes, and other pieces.
#### This extra infromation allows Python to be coded dynamically at the cost of performance.

### Example of Duck Typing

#### If an object quacks like a duck (has a quack method) and walks like a duck (has a walk method), Python treats it as a duck, irrespective of its data types.
#### Duck typing example shows Python's flexibility by allowing objects of different types to be used interchangeably if they exhibit the necessary behaviour.

In [46]:
def duck_sum(x): 
	total = 0 
	for item in x: 
		total += item 
	return total

In [47]:
duck_sum([2,1])
duck_sum([2.1,21])
duck_sum([2.1,np.nan])
a = np.array([0,1,2])
duck_sum([a,1])
#You do not have to divide cases depending on the types of auguments.

array([1, 2, 3])

### 1-7. Lists (リスト)

#### Lists are enclosed by brackets and contains mutable ordered collections of elements.

In [48]:
l = [1,2,3,4]

In [49]:
l[1]

2

In [50]:
l[2:]

[3, 4]

In [51]:
#test for membership
(4 in l)

True

In [52]:
# lists can be extended inplace
l.extend([5,6]) #add several elements
l.append(7) #add an element. 
l.append([8,9]) # add a list as an element
l

[1, 2, 3, 4, 5, 6, 7, [8, 9]]

In [53]:
#if elements can be ranked, it is possible to make sorted lists.
l1=[1,4,2,6,3]
l1.sort()
l1

[1, 2, 3, 4, 6]

In [54]:
#you can create a list with different types of elements.
l2 = [False, 2, np.nan, "a"]
[type(i) for i in l2]

[bool, int, float, str]

In [55]:
#lists are concatenated with +
l1[:2] + l1[2:] == l1

True

### 1-8. Tuples (タプル)

#### Tuples are enclosed by round brackets and `immutable` ordered collections of elements.

In [56]:
#Tuples can contain any data.
t = (1,2,3,4,5)
t1 = (t, 'f', np.nan)
t2 = (10,) #need comma to make one element tuple

In [57]:
#elements are accessed with brackets
t1[1]

'f'

In [58]:
#slice tuples ([min,max))
t[2:3]

(3,)

In [59]:
#repeat with *
t2*5

(10, 10, 10, 10, 10)

In [60]:
#concatenate with +
t + t1 + t2

(1, 2, 3, 4, 5, (1, 2, 3, 4, 5), 'f', nan, 10)

In [61]:
# tuples are immutable
t[1] = 3

TypeError: 'tuple' object does not support item assignment

In [62]:
#convert tuple to list
list(t)

[1, 2, 3, 4, 5]

In [63]:
#convert list to tuple
tuple(l)

(1, 2, 3, 4, 5, 6, 7, [8, 9])

#### You can use tuples to define variables that are not allowed to change, which can reduce error due to unexpected changes. 

### 1-9. Numpy Arrays (配列)

#### Lists are flexible and can contain heterogeneous types of elements. However, this feature requires the elements to contain additional information other than the values.
#### When you use fixed-type variables, Numpy arrays are more efficient for manipulating data.

In [64]:
# All the data must have the same type.
np.array([1,2,3,4])

array([1, 2, 3, 4])

In [65]:
# If types do not match, Numpy will upcast if possible.
np.array([3.14,1,2])

array([3.14, 1.  , 2.  ])

In [66]:
# create an empty array
# The values are uninitialized and will be whatever happens to already exist at that memory location.
np.empty(3)

array([3.14, 1.  , 2.  ])

In [67]:
# create an array full of zeros
np.zeros((3,3))
np.zeros((3,3), dtype=int)


array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [68]:
# create an array filled with ones
np.ones((3,3), dtype=int)
np.ones((3,3,3), dtype=int)

array([[[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]],

       [[1, 1, 1],
        [1, 1, 1],
        [1, 1, 1]]])

In [69]:
# create an array filled with a specific value
np.full((3,3),4.2)

array([[4.2, 4.2, 4.2],
       [4.2, 4.2, 4.2],
       [4.2, 4.2, 4.2]])

In [70]:
# create an array filled with a linear sequence
np.arange(3)

array([0, 1, 2])

In [71]:
# create a linearly spaced vector
np.linspace(0,1,5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [72]:
# create an identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [73]:
# modify value of element
a = np.ones((3,3))
a[0,1] = 2
a

array([[1., 2., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [74]:
# create a random matrix, each element uniformly distributed between 0 and 1
np.random.rand(3,3)

array([[0.99698441, 0.62339012, 0.27647973],
       [0.20509491, 0.42859671, 0.9358375 ],
       [0.07500105, 0.01511223, 0.18932359]])

In [75]:
# create a random matrix, each element normally distributed with mean 0 and standard deviation 1
np.random.normal(0,1,(3,3))

array([[ 0.4812699 ,  0.31070872, -0.00210054],
       [-0.48583788,  0.32455676, -0.66468746],
       [ 1.44065906,  1.46640705, -0.53819639]])

## 2. Basic operations on Numpy arrays

### 2-1. Basic operations

In [76]:
# transpose
a.T

array([[1., 1., 1.],
       [2., 1., 1.],
       [1., 1., 1.]])

In [77]:
# addition, subtraction
a + 1

a - 1

array([[0., 1., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [78]:
# element-wise multiplication（アダマール積）
a*a

array([[1., 4., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [79]:
# matrix multiplication（行列積）
a@a

array([[4., 5., 4.],
       [3., 4., 3.],
       [3., 4., 3.]])

### 2-2. Numpy array attributes and indexing

In [80]:
np.random.seed(0)  # seed for reproducibility

x1 = np.random.randint(10, size=6)  # One-dimensional array
x2 = np.random.randint(10, size=(3, 4))  # Two-dimensional array
x3 = np.random.randint(10, size=(3, 4, 5))  # Three-dimensional array

In [81]:
print("x3 number of dimensions: ", x3.ndim)
print("x3 size of each dimension:", x3.shape)
print("x3 total size of the array: ", x3.size)
print("x3 data type of the array: ", x3.dtype)

x3 number of dimensions:  3
x3 size of each dimension: (3, 4, 5)
x3 total size of the array:  60
x3 data type of the array:  int32


In [82]:
x1

array([5, 0, 3, 3, 7, 9])

In [83]:
x1[0] #index starts from 0

5

In [84]:
x1[4]

7

In [85]:
x1[-1]

9

In [86]:
x1[-3]

3

In [87]:
x2

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [88]:
x2[0,-2]

2

In [89]:
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [90]:
x3[1,2,0]

2

In [91]:
# extract a slice
# slicing: x[start:stop:step]
x2[0,0:2]

array([3, 5])

In [92]:
x2[0,0:3:2]

array([3, 2])

In [93]:
# multidimensional slicing
x3[0:2,0:2]

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7]]])

In [94]:
x2[0,::-1]

array([4, 2, 5, 3])

In [95]:
# boolean indexing
x2[x2<6]

array([3, 5, 2, 4, 1])

In [96]:
# ellipsis (inplace operations)
x2[...] = 1
x2

array([[1, 1, 1, 1],
       [1, 1, 1, 1],
       [1, 1, 1, 1]])

In [97]:
# Notice that array slices return views rather than copies of the array data.
x3

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [98]:
x3_sub = x3[0,1,:]
x3_sub

array([9, 4, 3, 0, 3])

In [99]:
x3_sub[0:] = 1
# modifying this subarray changes the original array.

In [100]:
x3

array([[[8, 1, 5, 9, 8],
        [1, 1, 1, 1, 1],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [101]:
#If you do not want to change the original array, you can make copy of the subarrays.
x3_sub_copy = x3[0,2,:]
x3_sub_copy
x3_sub[0:] = 1
x3

array([[[8, 1, 5, 9, 8],
        [1, 1, 1, 1, 1],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

### 2-3. Universal Functions

#### In order to deal with the weakness of slow loops in Python, we can use Numpy's universal functions, which implements vectorized operations and can be used to make repeated calculations on array elements much more efficiently.

In [102]:
%%timeit # for loop
import math
n = 10000000
y = [0] * n
for i in range(0, n):
    y[i] = math.sqrt(i)
    y

988 ms ± 43 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [103]:
%%timeit # universal function
x = np.arange(10000000)
y = np.sqrt(x)
y

31.6 ms ± 472 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [104]:
%%timeit # list comprehension
import math
n = 10000000
results = [math.sqrt(i) for i in range(0, n)]
results

1.02 s ± 26.1 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### 2-4. Broadcasting

#### Broadcasting allows us to implement element-by-element basis binary operations on arrays of different sizes.
#### It applies binary universal functions on arrays of different sizes.

#### Rules of Broadcasting
Broadcasting in NumPy follows a strict set of rules to determine the interaction between the two arrays:

Rule 1: If the two arrays differ in their number of dimensions, the shape of the one with fewer dimensions is padded with ones on its leading (left) side.

Rule 2: If the shape of the two arrays does not match in any dimension, the array with shape equal to 1 in that dimension is stretched to match the other shape.

Rule 3: If in any dimension the sizes disagree and neither is equal to 1, an error is raised.

see https://jakevdp.github.io/PythonDataScienceHandbook/02.05-computation-on-arrays-broadcasting.html

In [105]:
a = np.array([[0,1,2],[4,5,6]])
b = np.array([0.4,0.5,0.6])
a + b

array([[0.4, 1.5, 2.6],
       [4.4, 5.5, 6.6]])

In [106]:
a = np.array([[0,1,2],[4,5,6],[7,8,9]])
b = np.array([0.4,0.5,0.6])
a + b

array([[0.4, 1.5, 2.6],
       [4.4, 5.5, 6.6],
       [7.4, 8.5, 9.6]])

In [107]:
a = np.random.randn(4,3,2)
b = np.random.randn(1,3,2)
print(a + b)
print((a + b).shape)

[[[ 1.68271763  2.41537988]
  [-1.23500574 -1.80475894]
  [-0.39375567 -1.00459846]]

 [[-0.76527469  2.13732028]
  [ 1.01829543  1.7804533 ]
  [ 0.42675087 -2.13880127]]

 [[ 0.05263767  0.9580542 ]
  [-0.12317518  0.66795447]
  [ 0.81096522 -4.37807819]]

 [[ 1.15230993  1.3552806 ]
  [ 0.58497138  0.904819  ]
  [ 1.96626631 -2.1353174 ]]]
(4, 3, 2)


In [108]:
a = np.random.randn(4,3,3)
b = np.random.randn(1,3,2)
print(a + b)
print((a + b).shape)

ValueError: operands could not be broadcast together with shapes (4,3,3) (1,3,2) 

In [109]:
v = np.array([1,2])#.reshape(2,1)
a = np.array([[1,2,3],[4,5,6]])
a+v

ValueError: operands could not be broadcast together with shapes (2,3) (2,) 

In [110]:
v = np.array([1,2])#.reshape(2,1)
a = np.array([[1,2,3],[4,5,6]])
a+v[:,np.newaxis]

array([[2, 3, 4],
       [6, 7, 8]])

In [111]:
v[:,np.newaxis]

array([[1],
       [2]])

## 3. Control flow (制御フロー)

### 3-1. if-elseif-else conditions

In [112]:
x = -1
if x<0:
    print("x is negative")
elif x >0:
    print("x is positive")
else:
    print("x is zero")

x is negative


In [113]:
#ternary operator （三項演算子）
print("x<0") if x < 0 else print("x≥0")

x<0


### 3-2. While loops

In [114]:
x = 5
while x > 0:
    x -= 1
    print(f"x = {x}")

x = 4
x = 3
x = 2
x = 1
x = 0


In [115]:
x = 5
while x > 0:
    x += 1 #go into an infinite loop
    print(f"x = {x}")
    if x ==20:
        break #terminate the loop when the condition is satisfied

x = 6
x = 7
x = 8
x = 9
x = 10
x = 11
x = 12
x = 13
x = 14
x = 15
x = 16
x = 17
x = 18
x = 19
x = 20


### 3-3. For loops

In [116]:
for i in range(1,10):
    print(f"i = {i}")

i = 1
i = 2
i = 3
i = 4
i = 5
i = 6
i = 7
i = 8
i = 9


In [117]:
#iterate over an array
Y = np.array([[1,2,3],[4,5,6]])
for i in Y:
    print(f'{i}')

[1 2 3]
[4 5 6]


In [118]:
K = np.arange(10)
s = np.empty(10)
for (index, value) in enumerate(K):
    s[index]=value
s==K

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [119]:
Pref = ["Tokyo", "Osaka", "Nagoya"]
for i in Pref:
    print(i)

Tokyo
Osaka
Nagoya


### 3-4. List comprehensions (リスト内包表記)

In [120]:
isq = np.empty(10)
for i in range(10):
    isq[i]=i
isq

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [121]:
isq_lc = [i for i in range(10)]
isq_lc
#np.array(isq_lc)

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

## 4. Exercises

#### $\mathit{Q}$-1. Write a function to count the number 4 in a given list.

In [None]:
def L(x):
    
    

In [None]:
L([1,2,3,4,4])
L([1,2,3,4,4,4])

#### $\mathit{Q}$-2. Consider the polynominal $p(x)=\sum_{i=0}^n \alpha_i x^i$. Using `enumerate()` in your loop, write a function $p(x)$ such that $p(x, \alpha)$ computes the value given $x$ and an array of coefficients $\alpha=(\alpha_{0},\alpha_{1},...,\alpha_{n})$.