# Vectorization

In [2]:
import numpy as np

In [2]:
n = 1000000
u = list(range(n))
v = list(range(n))

## Example 1 (Addition operation)

In [3]:
%%timeit

w =[]

for i in range(n):
    w.append(u[i] + v[i])


143 ms ± 2.01 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [4]:
%%timeit

w = np.array(u) + np.array(v)

83.1 ms ± 1.28 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


## Example 2 (MLR with gradient descent)

In [3]:
import time

In [7]:
np.random.seed(0)

n_samples, n_features = 100, 5

X = np.random.rand(n_samples, n_features)
y = 1 + 2*X[:,0] + 3*X[:,1] - 4*X[:,2] + 5*X[:,3] - 6*X[:,4] + np.random.randn(n_samples)*0.1

X_intercept = np.hstack([np.ones((n_samples, 1)), X])

theta = np.zeros(n_features + 1)
alpha = 0.01
num_iters = 10000

t_start = time.process_time()
for i in range(num_iters):
    preds = X_intercept.dot(theta)
    errors = preds - y
    gradient = (1/n_samples) * X_intercept.T.dot(errors)
    theta -= alpha * gradient
    break
t_finish = time.process_time()

print(("Estimate coefficients: ", theta))
print("Coputation time = " + str(1000*(t_finish - t_start)) + "ms")

('Estimate coefficients: ', array([0.01354521, 0.007284  , 0.00929198, 0.00279421, 0.01198525,
       0.00121251]))
Coputation time = 0.0ms


In [7]:
np.random.seed(0)

n_samples, n_features = 100, 5

X = np.random.rand(n_samples, n_features)
y = 1 + 2*X[:,0] + 3*X[:,1] - 4*X[:,2] + 5*X[:,3] - 6*X[:,4] + np.random.randn(n_samples)*0.1

X_intercept = np.hstack([np.ones((n_samples, 1)), X])

theta = np.zeros(n_features + 1)
alpha = 0.01
num_iters = 10000

t_start = time.process_time()
for i in range(num_iters):
    gradient = [0.0 for i in range(len(theta))]
    for j in range(n_samples):
        prediction_j=0.0
        for k in range(len(theta)):
            prediction_j += theta[k]*X_intercept[j][k]
        error_j = prediction_j - y[j]
        for k in range(len(theta)):
            gradient[k] += X_intercept[j][k]*error_j
    for k in range(len(theta)):
        gradient[k] /= n_samples
    for k in range(len(theta)):
        theta[k] -= alpha*gradient[k]
t_finish = time.process_time()

print(("Estimate coefficients: ", theta))
print("Coputation time = " + str(1000*(t_finish - t_start)) + "ms")

('Estimate coefficients: ', array([ 1.00857698,  1.99309484,  2.97095941, -3.94642115,  4.99847753,
       -6.03675435]))
Coputation time = 4937.5ms


**Normal Equation**

In [8]:
beta = np.linalg.inv(X_intercept.T @ X_intercept) @ (X_intercept.T @ y)
print("Estimate coefficients: ", beta)

Estimate coefficients:  [ 1.0036296   1.99943207  2.9740728  -3.94906546  4.99981351 -6.0354361 ]


# Comprehensions

## Example 1 (List comprehension)

In [9]:
%%timeit

squares = []
for i in range(10000000):
    squares.append(i**2)

3.04 s ± 33.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [10]:
%%timeit

squares = [i**2 for i in range(10000000)]

2.77 s ± 18 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [11]:
squares = [i**2 for i in range(10)]

pair_numbers = [i for i in squares if i%2==0]
print(pair_numbers)

[0, 4, 16, 36, 64]


In [12]:
words_list = ["hello", "world", "this", "is", "a", "test"]

chars = [c for word in words_list if len(word)>3 for c in word]
print(chars)

['h', 'e', 'l', 'l', 'o', 'w', 'o', 'r', 'l', 'd', 't', 'h', 'i', 's', 't', 'e', 's', 't']


## Example 2 (Generators & Dictionaries comprehension)

In [13]:
words_list = ["hello", "world", "this", "is", "a", "test"]

chars = (c for word in words_list if len(word)>3 for c in word)
print(chars)

<generator object <genexpr> at 0x0000018A9350CBA0>


In [14]:
print(next(chars))

h


In [15]:
dict1 = {'key1':'value1', 'key2':'value2', 'key3':'value3'}
dict1_reverse = {value:key for key, value in dict1.items()}
print(dict1_reverse)

{'value1': 'key1', 'value2': 'key2', 'value3': 'key3'}


## Example 3 (Comprehension for prompt creation from file)

In [17]:
feedback_file_path = "feedbacks.txt"

In [18]:
filtered_feedbacks = []

with open(feedback_file_path, "r", encoding="utf-8") as file:
    for line in file:
        stripped_line = line.strip()
        words = stripped_line.split()
        if len(words)>5:
            filtered_feedbacks.append(stripped_line)

context = ""
for feedback in filtered_feedbacks:
    context += feedback + "\n"

print(context)

Fast shipping and excellent customer service. Highly recommend!
Loved the new features in the latest update.
Terrible experience. The item broke within a week of use.
Helped me solve my issue quickly.
Absolutely love it! Will definitely buy again.
Too expensive for the value offered.
Great product! The quality is amazing and totally worth the price.
The packaging was damaged, but the product works fine.
Not what I expected. The material feels cheap.



In [20]:
filtered_feedbacks = [
    line.strip()
    for line in open(feedback_file_path, "r", encoding="utf-8")
    if len(line.split())>5
]

context = "\n".join(filtered_feedbacks)

print(context)

Fast shipping and excellent customer service. Highly recommend!
Loved the new features in the latest update.
Terrible experience. The item broke within a week of use.
Helped me solve my issue quickly.
Absolutely love it! Will definitely buy again.
Too expensive for the value offered.
Great product! The quality is amazing and totally worth the price.
The packaging was damaged, but the product works fine.
Not what I expected. The material feels cheap.


# Built-in functions

### Example 1 (Map function)

In [22]:
words_list = ["hello", "world", "this", "is", "a", "test"]

upper_words = map(str.upper, words_list)

print(upper_words)

print(list(upper_words))

<map object at 0x0000018AE3129780>
['HELLO', 'WORLD', 'THIS', 'IS', 'A', 'TEST']


In [23]:
feature_values = [50, 200, 30, 150, 100]

def min_max_scaling(x, min_val, max_val):
    return (x-min_val)/(max_val-min_val)

normalized_values = list(map(lambda x: min_max_scaling(x, min(feature_values), max(feature_values)), feature_values))

print(normalized_values)

[0.11764705882352941, 1.0, 0.0, 0.7058823529411765, 0.4117647058823529]


### Example 2 (Filter)

In [25]:
words_list = ["hello", "world", "this", "is", "a", "test"]

big_words = filter(lambda word: len(word) > 4, words_list)

print(list(big_words))

['hello', 'world']


### Example 3 (Sorted)

In [26]:
predictions = [
    ("Cat", .85),
    ("Dog", .78),
    ("Bird", .92),
    ("Fish", .6)
]

sorted_predictions = sorted(predictions, key=lambda x: x[1], reverse=True)

print(sorted_predictions)

[('Bird', 0.92), ('Cat', 0.85), ('Dog', 0.78), ('Fish', 0.6)]


# Iterators & Generators

## Example 1 (Generators)

In [27]:
def feedback_generator(file_path, batch_size):
    batch = []
    with open(file_path, "r", encoding="utf-8") as file:
        for line in file:
            batch.append(line.strip())
            if len(batch) == batch_size:
                yield batch
                batch = []
        if batch:
            yield batch

file_path = "feedbacks.txt"
batch_size = 2

gen = feedback_generator(file_path, batch_size)
for batch in gen:
    print(batch)

['Great product, very useful.', 'Fast shipping and excellent customer service. Highly recommend!']
['Loved the new features in the latest update.', 'Bad customer service experience.']
['Terrible experience. The item broke within a week of use.', 'Helped me solve my issue quickly.']
['Absolutely love it! Will definitely buy again.', 'Too expensive for the value offered.']
['Great product! The quality is amazing and totally worth the price.', 'Okay.']
['Excellent support and fast responses.', 'The packaging was damaged, but the product works fine.']
['Not satisfied.', 'Not what I expected. The material feels cheap.']


In [29]:
def feedback_generator(file_path, batch_size):
    with open(file_path, "r", encoding="utf-8") as file:
        lines = (line.strip() for line in file)
        yield from (batch for batch in zip(*[lines]*batch_size))

        remainder = list(lines)
        if remainder:
            yield remainder

file_path = "feedbacks.txt"
batch_size = 2

gen = feedback_generator(file_path, batch_size)
for batch in gen:
    print(batch)

('Great product, very useful.', 'Fast shipping and excellent customer service. Highly recommend!')
('Loved the new features in the latest update.', 'Bad customer service experience.')
('Terrible experience. The item broke within a week of use.', 'Helped me solve my issue quickly.')
('Absolutely love it! Will definitely buy again.', 'Too expensive for the value offered.')
('Great product! The quality is amazing and totally worth the price.', 'Okay.')
('Excellent support and fast responses.', 'The packaging was damaged, but the product works fine.')
('Not satisfied.', 'Not what I expected. The material feels cheap.')


### Example 2 (Iterators)

In [32]:
class MyIterator:
    def __init__(self, start, end):
        self.current = start
        self.end = end
    def __iter__(self):
        return self
    def __next__(self):
        if self.current > self.end:
            raise StopIteration
        value = self.current
        self.current += 1
        return value
    
my_iter = MyIterator(1, 5)
for num in my_iter:
    print(num)

1
2
3
4
5


In [33]:
import itertools

**Chain & Chain.from_iterable**

In [34]:
feedbacks_list = ["feedback1", "feedback2"]
filtered_feedbacks = (
    line.strip()
    for line in open(feedback_file_path, "r", encoding="utf-8")
)

merged = itertools.chain(feedbacks_list, filtered_feedbacks)

print(list(merged))

['feedback1', 'feedback2', 'Great product, very useful.', 'Fast shipping and excellent customer service. Highly recommend!', 'Loved the new features in the latest update.', 'Bad customer service experience.', 'Terrible experience. The item broke within a week of use.', 'Helped me solve my issue quickly.', 'Absolutely love it! Will definitely buy again.', 'Too expensive for the value offered.', 'Great product! The quality is amazing and totally worth the price.', 'Okay.', 'Excellent support and fast responses.', 'The packaging was damaged, but the product works fine.', 'Not satisfied.', 'Not what I expected. The material feels cheap.']


In [35]:
filtered_feedbacks = (
    line.strip()
    for line in open(feedback_file_path, "r", encoding="utf-8")
)

nested_list = [feedbacks_list, filtered_feedbacks]

flattened = itertools.chain.from_iterable(nested_list)

print(list(flattened))

['feedback1', 'feedback2', 'Great product, very useful.', 'Fast shipping and excellent customer service. Highly recommend!', 'Loved the new features in the latest update.', 'Bad customer service experience.', 'Terrible experience. The item broke within a week of use.', 'Helped me solve my issue quickly.', 'Absolutely love it! Will definitely buy again.', 'Too expensive for the value offered.', 'Great product! The quality is amazing and totally worth the price.', 'Okay.', 'Excellent support and fast responses.', 'The packaging was damaged, but the product works fine.', 'Not satisfied.', 'Not what I expected. The material feels cheap.']


**Groupby**

In [36]:
feedbacks = [
    ("Great service", "positive"),
    ("Average experience", "neutral"),
    ("not worth the price", "negative"),
    ("fast delivery", "positive")
]

for sentiment, group in itertools.groupby(feedbacks, key=lambda x: x[1]):
    print(f"{sentiment}:{[item[0] for item in group]}")

positive:['Great service']
neutral:['Average experience']
negative:['not worth the price']
positive:['fast delivery']


In [38]:
feedbacks = [
    ("Great service", "positive"),
    ("Average experience", "neutral"),
    ("not worth the price", "negative"),
    ("fast delivery", "positive")
]

sorted_feedbacks = sorted(feedbacks, key=lambda x: x[1])

for sentiment, group in itertools.groupby(sorted_feedbacks, key=lambda x: x[1]):
    print(f"{sentiment}:{[item[0] for item in group]}")

negative:['not worth the price']
neutral:['Average experience']
positive:['Great service', 'fast delivery']


**Cycle**

In [39]:
api_keys = [
    "API_KEY1",
    "API_KEY2",
    "API_KEY3"
]

api_key_pool = itertools.cycle(api_keys)

for i in range(10):
    api_key = next(api_key_pool)
    print(f"Request {i+1} using {api_key}")

Request 1 using API_KEY1
Request 2 using API_KEY2
Request 3 using API_KEY3
Request 4 using API_KEY1
Request 5 using API_KEY2
Request 6 using API_KEY3
Request 7 using API_KEY1
Request 8 using API_KEY2
Request 9 using API_KEY3
Request 10 using API_KEY1
