# Lab NÂ°1 : Introduction: Python, Numpy, Pandas

## Imports

In [None]:
import sklearn                  
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from os import mkdir

## Strings

In [None]:
# Strings 1

from string import ascii_lowercase

print(f"{ascii_lowercase[2:-1:3] = }")
print(f"{ascii_lowercase[21:26:2] = }")
print(f"{ascii_lowercase[-1:-9:-2] = }")

In [None]:
# Strings 2

xhec_string = " XHEC DataScience for Business "

print(f"{xhec_string.lower() = }")
print(f"{xhec_string.strip() = }")
print(f"{xhec_string.replace('e', 'E') = }")

In [None]:
# Strings 3

print(f"{np.pi = :.9f}")

In [None]:
# Strings 4

hello_string = "HelLo WorLd!!"

occurrence_dictionary = {}

for letter in hello_string.lower():
  if letter not in occurrence_dictionary:
    occurrence_dictionary[letter] = 1
  else:
    occurrence_dictionary[letter] += 1

occurrence_dictionary

## Fast computations with numpy ; basic plots.

In [None]:
# FCwN 5

calculus = 1/10 + 100 - 100
print(f"{calculus = }")

print(f"{calculus == 0.1 = }")


### Our Answer:

Python's floating-point precision errors are causing the equality check for `calculus == 0.1` not to work as expected. Since 0.1 cannot be precisely represented in binary floating-point format, our computation's result is not exactly 0.1.

In [None]:
print(f"{np.isclose(calculus, 0.1) = }")

In [None]:
# FCwN 6

square_python_list = [i**2 for i in range(1,12)]
print(f"{square_python_list = }")

square_numpy_list = np.arange(1,12)**2
print(f"{square_numpy_list = }")

### Our Answer:

We should use numpy instead of native python because numpy is optimized in terms of computation power, time and memory compared to native python.

In [None]:
# FCwN 7

print(f"{np.arange(2, 15, 3) = }")
print(f"{np.linspace(0, 1, 15) = }")

In [None]:
# FCwB 8

import time

n = 10_000_000

python_begin = time.time()

python_product = 2
for k in range(1, n):
  python_product *= 4*k**2 / (4*k**2 - 1)

print(f"Python Time: {time.time() - python_begin:.3f} seconds")
print(f"{python_product = }")

counter_numpy_pre_allocation = np.arange(1, n)  # Try to pre-allocate to check for error.
cnpa = counter_numpy_pre_allocation  # Alias

numpy_begin = time.time()

numpy_product = 2 * np.prod(4 * cnpa**2 / (4 * cnpa**2 - 1))

print(f"Numpy Time: {time.time() - numpy_begin:.3f} seconds")
print(f"{numpy_product = }")

In [None]:
# FCwB 8_b

%timeit 2 * np.prod(4 * cnpa**2 / (4 * cnpa**2 - 1))

### Our Answer:

`%timeit`: This method is better because it benchmarks the code properly by running it several times and providing statistical information about the code snippet.
`time.time`: This method only executes the code once. While it is more customizable, it only returns the time needed for the computation.

In [None]:
# FCbW 9

dot_product = np.dot(np.arange(5), np.ones(5))

print(f"{dot_product = }")

print(f"{np.arange(5).shape = }")
print(f"{np.arange(5).T.shape = }")

one_d_array = np.array([1])

print(f"{one_d_array = }")
print(f"{one_d_array.T = }")
print(f"{one_d_array == one_d_array.T = }")

### Our Answers:

- The shape of our array is 1D, thus, its transpose is still a 1D array.
- Transposing a 1D array changes nothing, the array stays the same.
- If we had a 2D array, the shape would be swapped as so: (a, b) $\rightarrow$ (b, a).

In [None]:
# FCbW 10

M = np.arange(12).reshape(2, 6)

print(f"{M = }")
print(f"{M[:, ::3] = }")

print(f"{np.arange(3)*np.arange(4)[:, np.newaxis] = }")

### Our Answers:

- Create a $M_{(2, 6)}$ matrix with elements ranging from 0 to 11.
- Take every 3rd column from each row starting from 0.
- Broadcast to combine two arrays with different shapes. Broadcasting means that we make the shapes compatible in case they are not by expanding arrays.

In [None]:
# FCbW 11

#M = 2*np.random.rand(5, 10) - 1  # Projection from [0, 1] to [-1, 1]
M = np.random.uniform(-1, 1, (5, 10))

print(f"{M = }")

even_cols = np.arange(0, M.shape[1] - 1, 2)
M[:, even_cols] -= 2 * M[:, even_cols + 1]

print(f"{M = }")

In [None]:
# FCbW 12

M[M < 0] = 0
print(f"{M = }")

print(f"{M.mean(axis=1) = }")

M -= M.mean(axis=1)[:, np.newaxis]  # Broadcasting
print(f"{M = }")

In [None]:
# FCbW 13

M = np.random.uniform(-1, 1, (5, 10))
print(f"{M = }")

G = M.T @ M
#print(f"{G = }")

print(f"Is G symmetric: {np.allclose(G, G.T)}")

eigenvalues = np.linalg.eigvalsh(G)
is_psd = np.all(eigenvalues >= 0)
print(f"Is G positive semi-definite: {is_psd}")
# Due to floating-point errors result may be False, but should always be True in theory

rank_G = np.linalg.matrix_rank(G)
print(f"Rank of G: {rank_G}")

euclidean_norm_G = np.linalg.norm(G)
print(f"Euclidean norm of G: {euclidean_norm_G}")

spectral_norm_G = np.linalg.norm(G, ord=2)
print(f"Operator norm of G: {spectral_norm_G}")

print(f"Standard deviation of each column of G: {np.std(G, axis=0)}")

In [None]:
# FCbW 14

x = np.linspace(-1, 2, 100)

for d in [2, 3, 4]:
  x_d = x**d
  plt.plot(x, x_d, label=f"d = {d}")
  
plt.xlabel("x")
plt.ylabel("$x^d$")
plt.legend()
plt.title(r"$x \rightarrow x^d$")
plt.show()

## Numpy advanced behavior

### Numpy broadcasting

In [None]:
# Nab - Nb 15

my_array = np.arange(1, 20, 2)
print(f"{my_array = }")

my_array = my_array.astype(float)  # Casting int64 into float64

my_array -= my_array.mean()
print(f"{my_array = }")

show_minus_array = np.ones(10).reshape(2, 5)
print(f"{show_minus_array - 1 = }")

i = np.arange(3).reshape(3, 1)
j = np.arange(4).reshape(1, 4)
custom_array = 4 * i + j

print(f"{custom_array = }")

In [None]:
# Nab - Nb 16

custom_array = custom_array.astype(float)  # Casting int64 into float64
custom_array -= custom_array.mean(axis=0)

print(f"{custom_array = }")

custom_array -= custom_array[:, None].mean(axis=1)

print(f"{custom_array = }")

In [None]:
# Nab - Nb 17

i = np.arange(3)
j = np.arange(5)

arr = j * i [:, None]  # Broadcasting
print(f"{arr = }")

### Value and reference types

In [None]:
# Nab - Vart 18

a = 1_000
print(f"{id(a) = }")

b = 1_000
print(f"{id(b) = }")

c = a
print(f"{id(c) = }")
print(f"{id(a) == id(c) = }")

a += 1
print(f"{id(a) = }; {a = }")
print(f"{id(b) = }; {b = }")
print(f"{id(c) = }; {c = }")
print(f"{id(a) == id(c) = }")

#### Our Answer:

- `c = a` $\rightarrow$ copied by reference: `c` points to the very same int object as `a` (same `id` at that moment).
- `a += 1` $\rightarrow$ ints are immutable, so this is not in-place: Python creates a new int object `1001` (copied by value effect) and rebinds `a` to it. New `id` for `a`.
- `b` was already a distinct int object with value `1000` (separate `id` from the start), so untouched.
- `c` still references the original `1000` object (still the old `id`), since the rebinding only affected the name `a`.

In [None]:
# Nab - Vart 19

a = np.array([0, 1])
print(f"{id(a) = }")
print(f"{id(a[0]) = }; {id(a[1]) = }")

b = np.array([0, 1])
print(f"{id(b) = }")
print(f"{id(b[0]) = }; {id(b[1]) = }")

c = a
print(f"{id(c) = }")
print(f"{id(c[0]) = }; {id(c[1]) = }")
print(f"{id(a) == id(c) = }")
print(f"{id(a[0]) == id(c[0]) = }")
print(f"{id(a[1]) == id(c[1]) = }")

a += 1
print(f"{id(a) = }; {a = }")
print(f"{id(b) = }; {b = }")
print(f"{id(c) = }; {c = }")
print(f"{id(a) == id(c) = }")
print(f"{id(a[0]) = }; {id(a[1]) = }")
print(f"{id(b[0]) = }; {id(b[1]) = }")
print(f"{id(c[0]) = }; {id(c[1]) = }")
print(f"{id(a[0]) == id(c[0]) = }")
print(f"{id(a[1]) == id(c[1]) = }")

#### Our Answer:

- `a = np.array([0, 1])` $\rightarrow$ creates a fresh array (new object).
- `b = np.array([0, 1])` $\rightarrow$ copied by value (independent array, different id).
- `c = a` $\rightarrow$ copied by reference (same id as `a` at that moment).
- `a += 1` $\rightarrow$ in-place update (no new array, same id), so `c` sees the modified values because it was a reference.
- (Just for contrast: `a = a + 1` would have been copied by value: new array for `a`, `c` would still show the old values.)

In [None]:
# Nab - Vart 20

def f(a):
  a += 1

a = 1
print(f"{f(a) = }; {a = }")


a = np.ones(10)
print(f"{f(a) = }; {a = }")

#### Our Answer:

Same behavior as we have seen before:
- `a = 1` $\rightarrow$ copied by value (immutable int), so inside `f` the rebinding does not affect outer `a`.
- `a = np.ones(10)` $\rightarrow$ copied by reference (ndarray), and `a += 1` is in-place, so the outer array is updated.

In [None]:
# Nab - Vart 21

a = np.zeros(10)
b = np.ones(10)

#### Our Answers:

- `a = b` $\rightarrow$ copied by reference (both names point to the same ndarray; same id).
- `a[:] = b` $\rightarrow$ values copied by value into existing `a` (in-place data overwrite; id of `a` unchanged).
- `a = a + 1` $\rightarrow$ creates a new array (copied by value; `a` now points to a new object; old one unaffected if other refs exist).
- `a += 1` $\rightarrow$ in-place update (no new object; aliases see the changes).

# End.