In [61]:
import math
import string
import numpy as np
from sklearn import linear_model
import time

# Displaying versions for reproducibility
print(f"NumPy version: {np.__version__}")

NumPy version: 2.3.3


In [62]:
# --- 1. Slicing with string module ---
alphabet = string.ascii_lowercase

# Generate "cfilorux" (every 3rd letter starting from 'c')
s1 = alphabet[2:24:3]
print(f"cfilorux: {s1}")

# Generate "vxz"
s2 = alphabet[21::2]
print(f"vxz: {s2}")

# Generate "zxvt" (reverse order)
s3 = alphabet[25:18:-2]
print(f"zxvt: {s3}")

# --- 2. String Cleaning ---
raw_str = " XHEC DataScience for Business "
# Lowercase -> Strip spaces -> Replace 'e' with 'E'
clean_str = raw_str.lower().strip().replace('e', 'E')
print(f"Cleaned string: '{clean_str}'")

# --- 3. Pi Formatting ---
# Display pi with 9 decimal digits using f-string
print(f"Pi with 9 decimals: {math.pi:.9f}")

# --- 4. Character Counting (Manual Method) ---
s = "HelLo WorLd!!"
char_counts = {}
for char in s.lower():
    char_counts[char] = char_counts.get(char, 0) + 1
print(f"Occurrences: {char_counts}")

cfilorux: cfilorux
vxz: vxz
zxvt: zxvt
Cleaned string: 'xhEc datasciEncE for businEss'
Pi with 9 decimals: 3.141592654
Occurrences: {'h': 1, 'e': 1, 'l': 3, 'o': 2, ' ': 1, 'w': 1, 'r': 1, 'd': 1, '!': 2}


In [63]:
# --- SECTION 2: FAST COMPUTATIONS WITH NUMPY ---

# 1. Comparing floating point objects
val = 0.1 + 100 - 100
print(f"Result of 0.1 + 100 - 100: {val}")
print(f"Is it exactly 0.1? {val == 0.1}")

# Comment: Due to floating-point precision (IEEE 754), 0.1 cannot be represented 
# exactly in binary, leading to tiny rounding errors during arithmetic.
# Proper way to compare in NumPy:
print(f"Comparison using np.allclose: {np.allclose(val, 0.1)}")

# 2. For-loop vs NumPy Arrays
# List of squares using a loop
squares_list = [i**2 for i in range(1, 12)]

# Array of squares using NumPy
squares_array = np.arange(1, 12)**2

# Why arrays? NumPy operations are implemented in C and vectorized, 
# making them orders of magnitude faster and more memory-efficient than Python loops.

# 3. Built-in functions
# Integers from 2 to 14 by step of 3
arr_step = np.arange(2, 15, 3)
print(f"Step array: {arr_step}")

# 15 equispaced values from 0 to 1
arr_lin = np.linspace(0, 1, 15)
print(f"Linspace array: {arr_lin}")

Result of 0.1 + 100 - 100: 0.09999999999999432
Is it exactly 0.1? False
Comparison using np.allclose: True
Step array: [ 2  5  8 11 14]
Linspace array: [0.         0.07142857 0.14285714 0.21428571 0.28571429 0.35714286
 0.42857143 0.5        0.57142857 0.64285714 0.71428571 0.78571429
 0.85714286 0.92857143 1.        ]


In [64]:
# --- SECTION 3: VECTORIZING ---

n = 100_000

def wallis_loop(n):
    pi_half = 1.0
    for k in range(1, n + 1):
        pi_half *= (4 * k**2) / (4 * k**2 - 1)
    return pi_half * 2

def wallis_numpy(n):
    k = np.arange(1, n + 1)
    # Vectorized formula
    return 2 * np.prod((4.0 * k**2) / (4.0 * k**2 - 1))

# Timing comparison
start_time = time.time()
res_loop = wallis_loop(n)
loop_duration = time.time() - start_time

start_time = time.time()
res_np = wallis_numpy(n)
np_duration = time.time() - start_time

print(f"Loop result: {res_loop:.7f} (Time: {loop_duration:.5f}s)")
print(f"NumPy result: {res_np:.7f} (Time: {np_duration:.5f}s)")

# Professional timing using %timeit
print("Timing NumPy version with %timeit:")
%timeit wallis_numpy(n)

Loop result: 3.1415848 (Time: 0.02298s)
NumPy result: 3.1415848 (Time: 0.00371s)
Timing NumPy version with %timeit:
634 μs ± 34.2 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [65]:
# --- SECTION 4: SHAPES AND DIMENSIONS ---

# 1. Dot product
a = np.arange(5)
b = np.ones(5)
dot_product = np.dot(a, b)
print(f"Dot product of a and b: {dot_product}")

# 2. Understanding 1D Arrays
print(f"Shape of a: {a.shape}")         # Output: (5,)
print(f"Dimensions of a: {a.ndim}")     # Output: 1

# 3. Transposing 1D arrays
print(f"Shape of a.T: {a.T.shape}")     # Output: (5,)
# Note: Transposing a 1D array does nothing in NumPy. 
# To get a column vector, you need a 2D array (e.g., shape (5,1)).

Dot product of a and b: 10.0
Shape of a: (5,)
Dimensions of a: 1
Shape of a.T: (5,)


In [66]:
# --- SECTION 5: RESHAPING AND BROADCASTING ---

# 1. Reshape
M = np.arange(12).reshape(2, 6)
# M is now a 2x6 matrix. Reshape reorganizes data without copying it in memory.

# 2. Advanced Slicing
# M[:, ::3] selects all rows and every 3rd column (index 0 and 3)
sliced_M = M[:, ::3]
print("Sliced M (every 3rd column):\n", sliced_M)

# 3. Broadcasting Example
# Creating a multiplication table using np.newaxis
# This creates an operation between a (3,) and a (4,1) array
broadcasting_res = np.arange(3) * np.arange(4)[:, np.newaxis]
print("Broadcasting result (4x3 matrix):\n", broadcasting_res)

Sliced M (every 3rd column):
 [[0 3]
 [6 9]]
Broadcasting result (4x3 matrix):
 [[0 0 0]
 [0 1 2]
 [0 2 4]
 [0 3 6]]


In [69]:
# --- SECTION 6: ADVANCED MATRIX MANIPULATION ---

# 1. Create a random matrix M in R^(5x10) with coefficients in [-1, 1]
# Coefficients are independent and uniform
M = np.random.uniform(-1, 1, (5, 10))

# 2. Subtract twice the value of the following uneven column to each even column
# Even columns: 0, 2, 4, 6, 8 | Uneven columns: 1, 3, 5, 7, 9
M[:, 0::2] -= 2 * M[:, 1::2]

# 3. Data Cleaning: ReLU-like transformation
# Replace negative values by 0
M[M < 0] = 0

# 4. Row Normalization
row_means = M.mean(axis=1)
# Subtracting mean from each row using broadcasting (5,10) - (5,1)
M = M - row_means[:, np.newaxis]

# --- SECTION 7: LINEAR ALGEBRA & GRAM MATRIX ---

# 1. Computing the Gram Matrix G = M^T * M (Shape: 10x10)
G = M.T @ M

# 2. Symmetry Test
is_symmetric = np.allclose(G, G.T)
print(f"Is G symmetric? {is_symmetric}")

# 3. Eigenvalues and Positive Semi-Definiteness
eigenvalues = np.linalg.eigvals(G)
# A matrix is PSD if all eigenvalues are >= 0
is_psd = np.all(eigenvalues >= -1e-10) 
print(f"Are eigenvalues non-negative? {is_psd}")

# 4. Rank and Norms
rank_G = np.linalg.matrix_rank(G)
norm_fro = np.linalg.norm(G, ord='fro')
norm_spec = np.linalg.norm(G, ord=2)

# 5. Column Statistics
std_cols = G.std(axis=0)

print(f"Rank of G: {rank_G}")
print(f"Frobenius Norm: {norm_fro:.4f}")
print(f"Spectral Norm: {norm_spec:.4f}")
print(f"Standard deviation of columns: \n{std_cols}")

Is G symmetric? True
Are eigenvalues non-negative? True
Rank of G: 5
Frobenius Norm: 14.3418
Spectral Norm: 13.2371
Standard deviation of columns: 
[0.83942697 0.80242375 1.6013344  0.87700502 2.01793555 1.33913886
 1.43426891 1.44013848 1.99683162 1.3794754 ]
