In [1]:
import numpy as np

In [2]:
x = np.eye(5)
x

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [3]:
x = np.eye(5, dtype=int)
x

array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1]])

In [4]:
x.reshape(-1)

array([1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 1])

boolean index<br>
0 - False<br>
1 - True

In [5]:
a = np.array([1, 5, 3, 7, 9, 0.5, 4])

In [6]:
a < 20

array([ True,  True,  True,  True,  True,  True,  True])

In [7]:
a < 3

array([ True, False, False, False, False,  True, False])

In [8]:
a[a < 3]

array([1. , 0.5])

In [9]:
a = np.array([[10, 15, 20],
              [25, 30, 35]])

In [10]:
a[a > 20]

array([25, 30, 35])

In [11]:
a[a > 18]

array([20, 25, 30, 35])

In [12]:
a[a < 20] = 0
a

array([[ 0,  0, 20],
       [25, 30, 35]])

In [13]:
a = np.array([[1, 2, 3], [10, 20, 30]])
b = np.array([[3, 6, 8], [5, 6, 2], [5, 6, 9]])

In [14]:
a.shape

(2, 3)

In [15]:
b.shape

(3, 3)

In [16]:
a.dot(b)

array([[ 28,  36,  39],
       [280, 360, 390]])

In [17]:
np.matmul(a, b)

array([[ 28,  36,  39],
       [280, 360, 390]])

In [18]:
a @ b

array([[ 28,  36,  39],
       [280, 360, 390]])

In [19]:
data = np.array([[10, 20, 30],
                 [5, 15, 25]])

apply_along_axis

In [20]:
def range_of_row(x):
    return x.max() - x.min()

In [21]:
np.apply_along_axis(range_of_row, 0, data)

array([5, 5, 5])

In [22]:
data = np.array([[10, 23, 32],
                 [5, 15, 25]])

In [23]:
np.apply_along_axis(range_of_row, 0, data)

array([5, 8, 7])

In [24]:
np.apply_along_axis(range_of_row, 1, data)

array([22, 20])

In [25]:
data = np.array([[10, 23, 32],
                 [5, 15, 25]])

In [26]:
def custom(x):
    if 0 < x <= 10:
        return x ** 3
    elif 10 < x <= 15:
        return x ** 2
    elif 15 < x <= 20:
        return x ** 5
    else:
        return x ** 0.5

In [27]:
data.shape

(2, 3)

In [28]:
vectorized_custom = np.vectorize(custom)

In [29]:
vectorized_custom(data)

array([[1000,    4,    5],
       [ 125,  225,    5]])

In [30]:
arr = np.random.randint(0, 100, 10000000)
arr

array([ 5, 45, 99, ..., 77, 67, 73])

method 1: loop (slow)

In [31]:
def square_loop(x):
    return [i ** 2 for i in x]

method 2: vectorized (fast)

In [32]:
def square_vectorized(x):
    return x ** 2

In [33]:
%timeit square_loop(arr)
%timeit square_vectorized(arr)

1.14 s ± 75.5 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
11.3 ms ± 75.9 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [34]:
a = np.array([1, 2, 3, 4, 5])

In [35]:
b = a[1:4]
b

array([2, 3, 4])

In [36]:
b[0] = 322

In [37]:
a

array([  1, 322,   3,   4,   5])

In [38]:
c = a[1:4].copy()
c

array([322,   3,   4])

In [39]:
c[0] = 9211

In [40]:
a

array([  1, 322,   3,   4,   5])

splitting the array

In [41]:
a = np.array([11, 12, 17, 20, 32, 45, 67, 45, 89, 90, 11, 99])
a.size

12

In [42]:
np.split(a, 3)

[array([11, 12, 17, 20]), array([32, 45, 67, 45]), array([89, 90, 11, 99])]

In [43]:
b = np.array([[1, 2, 3, 4], [10, 20, 30, 40]])

In [44]:
np.split(b, 2, 0)

[array([[1, 2, 3, 4]]), array([[10, 20, 30, 40]])]

In [45]:
np.split(b, 2, 1)

[array([[ 1,  2],
        [10, 20]]),
 array([[ 3,  4],
        [30, 40]])]

In [46]:
np.split(b, 4, 1)

[array([[ 1],
        [10]]),
 array([[ 2],
        [20]]),
 array([[ 3],
        [30]]),
 array([[ 4],
        [40]])]

In [47]:
a

array([11, 12, 17, 20, 32, 45, 67, 45, 89, 90, 11, 99])

till 2, till 4, till7, remaining

In [48]:
np.split(a, [2, 4, 7])

[array([11, 12]),
 array([17, 20]),
 array([32, 45, 67]),
 array([45, 89, 90, 11, 99])]

In [49]:
np.split(a, [3, 5, 7, 9, 12])

[array([11, 12, 17]),
 array([20, 32]),
 array([45, 67]),
 array([45, 89]),
 array([90, 11, 99]),
 array([], dtype=int32)]

In [50]:
np.split(a, [3, 5, 7, 9, 11])

[array([11, 12, 17]),
 array([20, 32]),
 array([45, 67]),
 array([45, 89]),
 array([90, 11]),
 array([99])]

In [51]:
arr = np.arange(32).reshape(4, 8)
arr.shape

(4, 8)

In [52]:
arr

array([[ 0,  1,  2,  3,  4,  5,  6,  7],
       [ 8,  9, 10, 11, 12, 13, 14, 15],
       [16, 17, 18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29, 30, 31]])

In [53]:
np.split(arr, 4, 0)

[array([[0, 1, 2, 3, 4, 5, 6, 7]]),
 array([[ 8,  9, 10, 11, 12, 13, 14, 15]]),
 array([[16, 17, 18, 19, 20, 21, 22, 23]]),
 array([[24, 25, 26, 27, 28, 29, 30, 31]])]

In [54]:
np.split(arr, 4, 1)

[array([[ 0,  1],
        [ 8,  9],
        [16, 17],
        [24, 25]]),
 array([[ 2,  3],
        [10, 11],
        [18, 19],
        [26, 27]]),
 array([[ 4,  5],
        [12, 13],
        [20, 21],
        [28, 29]]),
 array([[ 6,  7],
        [14, 15],
        [22, 23],
        [30, 31]])]

In [55]:
np.savetxt("data/normalized_marks.csv", arr, delimiter=",", fmt="%.2f", header="One,Two,Three,Four", comments="")

# Pandas

In [56]:
!pip install pandas



In [57]:
import pandas as pd

In [58]:
marks = pd.Series([88, 75, 93, 91, 85], name="Marks")
marks

0    88
1    75
2    93
3    91
4    85
Name: Marks, dtype: int64

In [59]:
data = {
    "Name": ["subhendu", "amrendra", "sahoo"],
    "Marks": [88, 86, 93]
}

In [60]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Marks
0,subhendu,88
1,amrendra,86
2,sahoo,93


In [61]:
data = {
    "Name": ["Alice", "Bob", "Carol", "David", "Eva"],
    "Age": [19, 20, 19, 21, 20],
    "Gender": ["F", "M", "F", "M", "F"],
    "Marks_Math": [88, 75, 93, 70, 85],
    "Marks_Science": [92, 78, 88, 72, 90],
    "Marks_English": [85, 80, 90, 68, 87]
}

In [62]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Age,Gender,Marks_Math,Marks_Science,Marks_English
0,Alice,19,F,88,92,85
1,Bob,20,M,75,78,80
2,Carol,19,F,93,88,90
3,David,21,M,70,72,68
4,Eva,20,F,85,90,87


In [63]:
df.head()

Unnamed: 0,Name,Age,Gender,Marks_Math,Marks_Science,Marks_English
0,Alice,19,F,88,92,85
1,Bob,20,M,75,78,80
2,Carol,19,F,93,88,90
3,David,21,M,70,72,68
4,Eva,20,F,85,90,87


In [64]:
df = pd.read_csv("data/customers_updated.csv")

In [65]:
df.head()

Unnamed: 0,customer_id,name,sex,state,age,is_married,active_since,event_time,email
0,C10001,kara cooper,F,montana,76,True,2018-08-13 00:48:59,2024-05-02T13:43:28.295Z,kennethday@example.net
1,C10002,judy erickson,F,new york,76,True,2016-06-18 11:33:39,2024-05-02T13:43:28.296Z,smelendez@example.org
2,C10003,brian galvan,M,alaska,40,False,2017-01-23 19:25:14,2024-05-02T13:43:28.298Z,brianperry@example.net
3,C10004,karen norris,F,wisconsin,68,False,2018-06-15 09:07:32,2024-05-02T13:43:28.300Z,alyssamorgan@example.net
4,C10005,whitney taylor,F,ohio,62,True,2018-07-18 15:53:00,2024-05-02T13:43:28.301Z,cjenkins@example.org


In [66]:
df.tail()

Unnamed: 0,customer_id,name,sex,state,age,is_married,active_since,event_time,email
9995,C19996,jackie martin,F,delaware,88,False,2018-09-18 00:14:35,2024-05-02T13:43:34.852Z,qblair@example.net
9996,C19997,ashley davis,F,wisconsin,73,False,2016-12-13 03:39:29,2024-05-02T13:43:34.853Z,kimberlydiaz@example.net
9997,C19998,destiny gallagher,F,california,59,False,2017-04-24 21:35:35,2024-05-02T13:43:34.854Z,longjustin@example.net
9998,C19999,elizabeth bradley,F,hawaii,56,True,2019-11-14 21:46:34,2024-05-02T13:43:34.854Z,guerrerosandra@example.org
9999,C20000,jamie ho,F,missouri,38,False,2018-12-08 03:00:01,2024-05-02T13:43:34.855Z,chloe51@example.net


In [67]:
df.head(10)

Unnamed: 0,customer_id,name,sex,state,age,is_married,active_since,event_time,email
0,C10001,kara cooper,F,montana,76,True,2018-08-13 00:48:59,2024-05-02T13:43:28.295Z,kennethday@example.net
1,C10002,judy erickson,F,new york,76,True,2016-06-18 11:33:39,2024-05-02T13:43:28.296Z,smelendez@example.org
2,C10003,brian galvan,M,alaska,40,False,2017-01-23 19:25:14,2024-05-02T13:43:28.298Z,brianperry@example.net
3,C10004,karen norris,F,wisconsin,68,False,2018-06-15 09:07:32,2024-05-02T13:43:28.300Z,alyssamorgan@example.net
4,C10005,whitney taylor,F,ohio,62,True,2018-07-18 15:53:00,2024-05-02T13:43:28.301Z,cjenkins@example.org
5,C10006,brianna ruiz,F,washington,73,False,2016-02-10 16:33:24,2024-05-02T13:43:28.304Z,vluna@example.net
6,C10007,lisa johnson,F,hawaii,82,False,2018-04-12 22:46:45,2024-05-02T13:43:28.305Z,jeffrey28@example.org
7,C10008,joe garrett,M,new york,32,False,2018-01-25 13:18:05,2024-05-02T13:43:28.306Z,greenmichael@example.com
8,C10009,emily smith,F,maine,86,False,2018-12-03 07:53:40,2024-05-02T13:43:28.307Z,denisefoster@example.org
9,C10010,jeremiah flores,M,texas,33,True,2018-03-24 07:20:28,2024-05-02T13:43:28.309Z,wfox@example.org


In [68]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 9 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   customer_id   10000 non-null  object
 1   name          10000 non-null  object
 2   sex           10000 non-null  object
 3   state         10000 non-null  object
 4   age           10000 non-null  int64 
 5   is_married    10000 non-null  bool  
 6   active_since  10000 non-null  object
 7   event_time    10000 non-null  object
 8   email         10000 non-null  object
dtypes: bool(1), int64(1), object(7)
memory usage: 634.9+ KB
