# NUMPY

In [None]:
import numpy as np

# Creating Arrays

In [None]:
# 1D array
arr1=np.array([1,2,3])
# 2D array
arr2=np.array([[1,2],[4,3]])
# Array with 3 rows and 5 columns (3,5)
arr3=np.array([[1,8,5,9,5],[6,7,8,9,11],[10,2,3,4,15]])
# Array of zeros
zeros=np.zeros((2,3))
# Array of ones
ones=np.ones((3,3))
# Constant array
full=np.full((2,2),7)
# Identity matrix
eye=np.eye(3)
# Random floats
rand=np.random.rand(3,2)
# Evenly spaced numbers
lin=np.linspace(0,1,5)
# Range of numbers
range_arr=np.arange(0,10,2)

In [None]:
arr1

array([1, 2, 3])

In [None]:
arr2

array([[1, 2],
       [4, 3]])

In [None]:
arr3

array([[ 1,  8,  5,  9,  5],
       [ 6,  7,  8,  9, 11],
       [10,  2,  3,  4, 15]])

In [None]:
#  2D array (2,3) filled with zeros
zeros

array([[0., 0., 0.],
       [0., 0., 0.]])

In [None]:
# (3,3) array filled with ones
ones

array([[1., 1., 1.],
       [1., 1., 1.],
       [1., 1., 1.]])

In [None]:
# 2x2 array filled with constant values of 7
full

array([[7, 7],
       [7, 7]])

In [None]:
# 3x3 idenity matrix has 1's on the diagonal and 0's elsewhere
eye

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [None]:
# 3x2 array filled with random floats between 0 and 1
rand

array([[0.10806377, 0.17890281],
       [0.8858271 , 0.36536497],
       [0.21876935, 0.75249617]])

In [None]:
# (0,1,5) 5 evenly spaced numbers from 0 to 1
lin

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [None]:
# create numbers from 0 to 10 ( excluding 10)
range_arr

array([0, 2, 4, 6, 8])

#  Array Information

In [None]:
# shape of the array
arr2.shape

(2, 2)

In [None]:
# number od dimensions
arr2.ndim

2

In [None]:
# Total number of elements
# 2 rows x 2 columns = 4 elements in total
arr2.size

4

In [None]:
# data types of elements
# 64 bit int
arr2.dtype

dtype('int64')

In [None]:
# size of each element in bytes
# 8( for int64)
# each element in the array takes 8 bytes ( if int64)
arr2.itemsize

8

# Reshaping and Flattening

In [None]:
# change the shape
# arr3: (3,5)= 15 elements  (5,3)= 15 elements

arr3.reshape(5,3)

array([[ 1,  8,  5],
       [ 9,  5,  6],
       [ 7,  8,  9],
       [11, 10,  2],
       [ 3,  4, 15]])

In [None]:
# Flatten to 1D
# 2D array arr2 into a 1D array
arr2.ravel()

array([1, 2, 4, 3])

In [None]:
# Transpose the array
# swaps rows with columns
arr2.T

array([[1, 4],
       [2, 3]])

In [None]:
# add new dimension at axis
# add a new dimension at the beginning ( axis 0)
# arr2 shape : (2,2) --> new shape:(1,2,2) --> makes it a 3D array
np.expand_dims(arr2,axis=1)

array([[[1, 2]],

       [[4, 3]]])

# Indexing and slicing

In [None]:
# access specific element
# row 0, column >> 8
arr3[0,1]

np.int64(8)

In [None]:
# get entire column 1
# : meand all rows, 1 refers to column index 1
arr3[:,1]

array([8, 7, 2])

In [None]:
# Get entire row 1
# row index 1 , : means all columns
arr3[1,:]

array([ 6,  7,  8,  9, 11])

In [None]:
# slicing submatrix
# row: from index 0 to 2 ( 3 is excluded --> rows 0,1,2)
# columns: from index 0 ro1 ( 2 is excluded --> columns 0 and 1)
arr3[0:3,0:2]

array([[ 1,  8],
       [ 6,  7],
       [10,  2]])

# Array operations

In [None]:
# add scalar , adds 10 to each element of arr3
arr3 + 10

array([[11, 18, 15, 19, 15],
       [16, 17, 18, 19, 21],
       [20, 12, 13, 14, 25]])

In [None]:
# multiply by scalar , multiplies each element of arr3 by 2
arr3 * 2

array([[ 2, 16, 10, 18, 10],
       [12, 14, 16, 18, 22],
       [20,  4,  6,  8, 30]])

In [None]:
# element-wise addition
# both arr2 and arr4 are of shape (2,2), adds corresponding elements from both arrays
arr4=np.array([[5.4,6.9],[7.8,8]])
arr4 + arr2

array([[ 6.4,  8.9],
       [11.8, 11. ]])

In [None]:
# matrix dot product
# arr2: shape(2,2)
# arr4.T: shape (2,2) ( transpose of arr4, but same shape here)
np.dot(arr2,arr4.T)

array([[19.2, 23.8],
       [42.3, 55.2]])

How it's calculated:

First row of arr2: [1, 2]

dot with [5.4, 6.9] → 1 x5.4 + 2 x 6.9 = 5.4 + 13.8 = 19.2

dot with [7.8, 8] → 1x7.8 + 2x8 = 7.8 + 16 = 23.8

Second row of arr2: [4, 3]

dot with [5.4, 6.9] → 4 x5.4 + 3 x 6.9 = 21.6 + 20.7 = 42.3

dot with [7.8, 8] → 4x7.8 + 3x8 = 55.2

In [None]:
# element-wise product -- multiplies corresponding elements
np.multiply(arr2,arr4)

array([[ 5.4, 13.8],
       [31.2, 24. ]])

# Mathematical functions

In [None]:
# sum of all elements
# (1+2+3 = 6)
np.sum(arr1)

np.int64(6)

In [None]:
# mean(average)
# (1+2+3+4)/4 --> 2.5
np.mean(arr2)

np.float64(2.5)

In [None]:
# median value
np.median(arr3)

np.float64(7.0)

flattend array [1,2,3,4,5,5,6,7,8,8,9,9,10,11,15]
median = 7.0

In [None]:
# standard deviation
# ~ 3.59 ( measuews spread of values)
np.std(arr3)

np.float64(3.5938218591849473)


### Flatten it into a 1D array:

```
[1, 8, 5, 9, 5, 6, 7, 8, 9, 11, 10, 2, 3, 4, 15]
```

###  Step 1: Find the Mean

Sum of all values:

```
1 + 8 + 5 + 9 + 5 + 6 + 7 + 8 + 9 + 11 + 10 + 2 + 3 + 4 + 15 = 103
```

Number of values = 15

**Mean** $\mu$ = Total sum / Count

$$
\mu = \frac{103}{15} \approx 6.87
$$

---

### Step 2: Find Squared Differences from Mean


| Value | Difference from Mean | Square |
| ----- | -------------------- | ------ |
| 1     | 1 - 6.87 = -5.87     | 34.46  |
| 8     | 1.13                 | 1.28   |
| 5     | -1.87                | 3.50   |
| 9     | 2.13                 | 4.53   |
| 5     | -1.87                | 3.50   |
| 6     | -0.87                | 0.76   |
| 7     | 0.13                 | 0.02   |
| 8     | 1.13                 | 1.28   |
| 9     | 2.13                 | 4.53   |
| 11    | 4.13                 | 17.06  |
| 10    | 3.13                 | 9.80   |
| 2     | -4.87                | 23.72  |
| 3     | -3.87                | 14.97  |
| 4     | -2.87                | 8.23   |
| 15    | 8.13                 | 66.09  |

---

### Step 3: Take Mean of Squared Differences (Variance)

Sum of squares:

```
34.46 + 1.28 + 3.50 + 4.53 + 3.50 + 0.76 + 0.02 + 1.28 + 4.53 + 17.06 + 9.80 + 23.72 + 14.97 + 8.23 + 66.09 = 193.73
```

Divide by number of elements (15):

$$
\text{Variance} = \frac{193.73}{15} \approx 12.915
$$

---

### Step 4: Square Root of Variance = Standard Deviation

$$
\text{Standard Deviation} = \sqrt{12.915} \approx 3.59
$$

---

###  Final Answer:

```python
np.std(arr3) ≈ 3.59
```

In [None]:
# variance
np.var(arr3)

np.float64(12.915555555555553)



### Flatten it into a 1D array:

```
[1, 8, 5, 9, 5, 6, 7, 8, 9, 11, 10, 2, 3, 4, 15]
```

---

###  Step 1: Find the Mean

Sum of all values:

```
1 + 8 + 5 + 9 + 5 + 6 + 7 + 8 + 9 + 11 + 10 + 2 + 3 + 4 + 15 = 103
```

Number of values = 15

**Mean** $\mu$ = Total sum / Count

$$
\mu = \frac{103}{15} \approx 6.87
$$

---

###  Step 2: Find Squared Differences from Mean

Now, subtract the mean from each value and square it:

| Value | Difference from Mean | Square |
| ----- | -------------------- | ------ |
| 1     | 1 - 6.87 = -5.87     | 34.46  |
| 8     | 1.13                 | 1.28   |
| 5     | -1.87                | 3.50   |
| 9     | 2.13                 | 4.53   |
| 5     | -1.87                | 3.50   |
| 6     | -0.87                | 0.76   |
| 7     | 0.13                 | 0.02   |
| 8     | 1.13                 | 1.28   |
| 9     | 2.13                 | 4.53   |
| 11    | 4.13                 | 17.06  |
| 10    | 3.13                 | 9.80   |
| 2     | -4.87                | 23.72  |
| 3     | -3.87                | 14.97  |
| 4     | -2.87                | 8.23   |
| 15    | 8.13                 | 66.09  |

---

###  Step 3: Take Mean of Squared Differences (Variance)

Sum of squares:

```
34.46 + 1.28 + 3.50 + 4.53 + 3.50 + 0.76 + 0.02 + 1.28 + 4.53 + 17.06 + 9.80 + 23.72 + 14.97 + 8.23 + 66.09 = 193.73
```

Divide by number of elements (15):

$$
\text{Variance} = \frac{193.73}{15} \approx 12.915
$$

In [None]:
# minimum values
np.min(arr4)

np.float64(5.4)

In [None]:
# maximum values
np.max(arr3)

np.int64(15)

In [None]:
# index of minimum elements
# 0 --> 1 is the smallest , at index 0 in flattened view
np.argmin(arr3)

np.int64(0)

In [None]:
# index of maximum element
# 14 --> 15 is the largest, at the index 14 in flattend view
np.argmax(arr3)

np.int64(14)

In [None]:
# cumulative sum
np.cumsum(arr3)

array([  1,   9,  14,  23,  28,  34,  41,  49,  58,  69,  79,  81,  84,
        88, 103])

### Flattening (because no `axis` is given)
   NumPy first lays the 2-D array out as one long 1-D vector in C-style (row-major) order:

   ```
   [1, 8, 5, 9, 5,   6, 7, 8, 9, 11,   10, 2, 3, 4, 15]
   ```

### Running total.
   Starting at the first element, it keeps adding each new value to the total so far:
  ```python
   | Index | Value | Running total |
   | ----- | ----- | ------------- |
   | 0     | 1     | 1             |
   | 1     | 8     | 1 + 8   = 9   |
   | 2     | 5     | 9 + 5   = 14  |
   | 3     | 9     | 14 + 9  = 23  |
   | 4     | 5     | 23 + 5  = 28  |
   | 5     | 6     | 28 + 6  = 34  |
   | 6     | 7     | 34 + 7  = 41  |
   | 7     | 8     | 41 + 8  = 49  |
   | 8     | 9     | 49 + 9  = 58  |
   | 9     | 11    | 58 + 11 = 69  |
   | 10    | 10    | 69 + 10 = 79  |
   | 11    | 2     | 79 + 2  = 91  |
   | 12    | 3     | 91 + 3  = 94  |
   | 13    | 4     | 94 + 4  = 98  |
   | 14    | 15    | 98 + 15 = 113 |
   ```

### Return value
   The cumulative totals are stored in a new array of the same length:

   ```python
   array([  1,   9,  14,  23,  28,
           34,  41,  49,  58,  69,
           79,  91,  94,  98, 113])
   ```

---

###   Why and when you’d change `axis`

* **Row-wise running totals:** `np.cumsum(arr3, axis=1)`
  – keeps each row separate and accumulates across the columns.

* **Column-wise running totals:** `np.cumsum(arr3, axis=0)`
  – accumulates down each column.



In [None]:
# cumulative product
np.cumprod(arr2)

array([ 1,  2,  8, 24])


###  Flattened version:

```
[1, 8, 5, 9, 5, 6, 7, 8, 9, 11, 10, 2, 3, 4, 15]
```

---

### Step-by-step Cumulative Product:

| Index | Value | Running Product                 |
| ----- | ----- | ------------------------------- |
| 0     | 1     | 1                               |
| 1     | 8     | 1 × 8   = 8                     |
| 2     | 5     | 8 × 5   = 40                    |
| 3     | 9     | 40 × 9  = 360                   |
| 4     | 5     | 360 × 5 = 1800                  |
| 5     | 6     | 1800 × 6 = 10800                |
| 6     | 7     | 10800 × 7 = 75600               |
| 7     | 8     | 75600 × 8 = 604800              |
| 8     | 9     | 604800 × 9 = 5443200            |
| 9     | 11    | 5443200 × 11 = 59875200         |
| 10    | 10    | 59875200 × 10 = 598752000       |
| 11    | 2     | 598752000 × 2 = 1197504000      |
| 12    | 3     | 1197504000 × 3 = 3592512000     |
| 13    | 4     | 3592512000 × 4 = 14370048000    |
| 14    | 15    | 14370048000 × 15 = 215550720000 |

---

### Final Output:

```python
array([
         1,         8,        40,       360,      1800,
     10800,     75600,    604800,   5443200,  59875200,
 598752000, 1197504000, 3592512000,14370048000,215550720000
])
```


In [None]:
# round off to 0 decimal places
np.round(arr4,0)

array([[5., 7.],
       [8., 8.]])

In [None]:
# unique elements
np.unique(arr3)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 15])

# Logical and comparison

In [None]:
# element - wise comparison
# compares each element to 5 and returns a boolean array (True or False)
arr3>5

array([[False,  True, False,  True, False],
       [ True,  True,  True,  True,  True],
       [ True, False, False, False,  True]])

In [None]:
# conditional replacement
# if (element > 3) -->1, else --> 0
np.where(arr3>3,1,0)

array([[0, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 0, 0, 1, 1]])

In [None]:
# check if any element is > 4
# returns true if at least one element is greater than 4
np.any(arr3>4)

np.True_

In [None]:
# check if all elements > 0
# returns true if all elements are greater than 0
np.all(arr3> 0)

np.True_

# Copying and Broadcasting

In [None]:
# Deep copy , copy_arr is completely independent of arr3
copy_arr=arr3.copy()
copy_arr

array([[ 1,  8,  5,  9,  5],
       [ 6,  7,  8,  9, 11],
       [10,  2,  3,  4, 15]])

In [None]:
# broadcasting,
broadcasting= arr2 + np.array([1,2])
broadcasting

array([[2, 4],
       [5, 5]])

broadcasted = [[1+1, 2+2],
               [4+1, 3+2]]
           => [[2, 4],
               [5, 5]]


# Handling missing data

In [None]:
# check for NaN values, each element is true if it's NaN else false
np.isnan(arr3)

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False, False, False]])

In [None]:
# Replace NaN with 0
arr=np.nan_to_num(arr3)
arr

array([[ 1,  8,  5,  9,  5],
       [ 6,  7,  8,  9, 11],
       [10,  2,  3,  4, 15]])

# Saving and loading arrays

In [None]:
# save as binary .npy files
np.save('data.npy',arr)

In [None]:
# load binary .npy files
np.load('data.npy')

array([[ 1,  8,  5,  9,  5],
       [ 6,  7,  8,  9, 11],
       [10,  2,  3,  4, 15]])

In [None]:
# save as csv
np.savetxt('data.csv',arr,delimiter=',')

In [None]:
# load csv
np.loadtxt('data.csv',delimiter=',')

array([[ 1.,  8.,  5.,  9.,  5.],
       [ 6.,  7.,  8.,  9., 11.],
       [10.,  2.,  3.,  4., 15.]])

# Random madule

In [None]:
# set random seed - to fixed values ( 10 in this case)
# this ensures reproduciblity - every time you run your code, you will get the same random results
a=np.random.seed(10)
a

In [None]:
# generate normal distribution
# generate 100 random numbers from a normal distribution 0: mean ( center of the distribution) 1: SD
np.random.normal(0,1,100)

array([ 1.3315865 ,  0.71527897, -1.54540029, -0.00838385,  0.62133597,
       -0.72008556,  0.26551159,  0.10854853,  0.00429143, -0.17460021,
        0.43302619,  1.20303737, -0.96506567,  1.02827408,  0.22863013,
        0.44513761, -1.13660221,  0.13513688,  1.484537  , -1.07980489,
       -1.97772828, -1.7433723 ,  0.26607016,  2.38496733,  1.12369125,
        1.67262221,  0.09914922,  1.39799638, -0.27124799,  0.61320418,
       -0.26731719, -0.54930901,  0.1327083 , -0.47614201,  1.30847308,
        0.19501328,  0.40020999, -0.33763234,  1.25647226, -0.7319695 ,
        0.66023155, -0.35087189, -0.93943336, -0.48933722, -0.80459114,
       -0.21269764, -0.33914025,  0.31216994,  0.56515267, -0.14742026,
       -0.02590534,  0.2890942 , -0.53987907,  0.70816002,  0.84222474,
        0.2035808 ,  2.39470366,  0.91745894, -0.11227247, -0.36218045,
       -0.23218226, -0.5017289 ,  1.12878515, -0.69781003, -0.08112218,
       -0.52929608,  1.04618286, -1.41855603, -0.36249918, -0.12

In [None]:
# random row selection
np.random.choice(arr.shape[0],size=3,replace=False)

array([0, 1, 2])

In [None]:
# random integers
# random integers between low (1) and high(10)
np.random.randint(1, 10, size=5)

array([2, 5, 8, 4, 6])

In [None]:
np.random.randint(1, 10, size=(2,3))

array([[4, 9, 2],
       [7, 9, 3]])

In [None]:
# random permulation ( shuffling)
np.random.permutation(5)

array([1, 4, 3, 0, 2])

In [None]:
np.random.permutation([50,30,11])

array([50, 11, 30])

In [None]:
# random floats from uniform distribution
# random float values between low to high
np.random.uniform(5,10,size=5)

array([8.57124638, 9.4720552 , 8.86235978, 9.1635764 , 9.49237758])

# Matrix and linear algebra
## linalg stands for Linear Algebra
---

In [None]:
# matrix inverse
# calculates the inverse of a square matrix arr2
# A @ A⁻¹ = I (identity matrix), then inverse is valid.
np.linalg.inv(arr2)

array([[-0.6,  0.4],
       [ 0.8, -0.2]])

In [None]:
# determinant
np.linalg.det(arr2)

np.float64(-4.999999999999999)


* `det ≠ 0` → the matrix is **invertible**
* `det = 0` → the matrix is **singular** (not invertible)



##  Example with Your Matrix:

```python
arr2 = np.array([[1, 2],
                 [4, 3]])
```

### For a 2x2 matrix:

If

```
A = [[a, b],
     [c, d]]
```

Then the **determinant** is:

```
det(A) = (a * d) - (b * c)
```

###  Apply to `arr2`:

```
a = 1, b = 2
c = 4, d = 3

det(arr2) = (1 * 3) - (2 * 4)
           = 3 - 8
           = -5
```


```python
np.linalg.det(arr2) → -5.0
```



In [None]:
# Eigenvalues and eigenvectors
np.linalg.eig(arr2)

EigResult(eigenvalues=array([-1.,  5.]), eigenvectors=array([[-0.70710678, -0.4472136 ],
       [ 0.70710678, -0.89442719]]))


* **Eigenvalues** (λ)
* **Eigenvectors** (v)

from the matrix `arr2`.

It solves the equation:

```
A · v = λ · v
```

Where:

* `A` is your matrix
* `λ` are **eigenvalues**
* `v` are **eigenvectors**



```python
arr2 = np.array([
    [1, 2],
    [4, 3]
])
```


##  Output Explanation:

```python
eigenvalues = array([-1., 5.])
eigenvectors = array([
    [-0.70710678, -0.4472136 ],
    [ 0.70710678, -0.89442719]
])
```


###  1. Eigenvalues

```python
[-1.0, 5.0]
```

These are the scalars `λ` such that:

```
arr2 · v = λ · v
```

* `5.0` → dominant eigenvalue (larger)
* `-1.0` → secondary eigenvalue


###  2. Eigenvectors

Each column corresponds to an eigenvector for its matching eigenvalue.

So:

* First column → eigenvector for `-1.0`
* Second column → eigenvector for `5.0`

```python
Eigenvector for -1.0 = [-0.7071,  0.7071]
Eigenvector for  5.0 = [-0.4472, -0.8944]
```



In [None]:
# solve linear system
# arr2 × X = arr4 --> X= arr4/arr2
np.linalg.solve(arr2,arr4)

array([[-0.12, -0.94],
       [ 2.76,  3.92]])

# useful utility

In [None]:
# Limit values within a range
# limit (clamps) all values in arr to be between 0 and 10 , if values < 0 --> its set to 0, if a values is > 10 --> its sets to 10
np.clip(arr,0,10)

array([[ 1,  8,  5,  9,  5],
       [ 6,  7,  8,  9, 10],
       [10,  2,  3,  4, 10]])

In [None]:
# sort by columns
# axis=0 --> vertical ( down each column)
np.sort(arr3,axis=0)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  5,  9, 11],
       [10,  8,  8,  9, 15]])

In [None]:
# get indices that would sort the array
# returns the indices that would aodrt arr in ascending order
np.argsort(arr)

array([[0, 2, 4, 1, 3],
       [0, 1, 2, 3, 4],
       [1, 2, 3, 0, 4]])

In [None]:
# find the 30th percentile
# finds the value below which 30% of the data lies
np.percentile(arr,30)

np.float64(5.0)

# full numpy analysis

In [18]:
import pandas as pd

In [20]:
# load data
df=pd.read_csv('/content/market_data.csv')

In [21]:
# convert DataFrame to numpy array
data=df.to_numpy()

In [41]:
# rows and columns
data.shape

(992, 8)

In [22]:
# mean = average
mean=np.mean(data,axis=0)
mean

array([1.71327119e+05, 3.05930343e+01, 2.51139113e-01, 9.86787399e+01,
       4.94526210e-01, 1.45998286e+01, 1.47956956e+03, 1.59650428e+03])

In [23]:
# measures how spread out the values are from the mean
std=np.std(data,axis=0)
std

array([8.13568058e+04, 1.74842841e+01, 1.45274396e-01, 5.70885512e+01,
       2.86665650e-01, 8.71113948e+00, 8.84973244e+02, 9.27007192e+02])

In [24]:
# smallest values
min=np.min(data,axis=0)
min

array([1.992e+03, 1.900e-01, 0.000e+00, 1.300e-01, 0.000e+00, 1.400e-01,
       4.000e+00, 1.254e+01])

In [25]:
# largest values
max=np.max(data,axis=0)
max

array([3.93914e+05, 5.99600e+01, 5.00000e-01, 1.99910e+02, 1.00000e+00,
       2.99900e+01, 2.99700e+03, 3.19827e+03])

In [26]:
# correlation matrix
# transpose the matrix so that each row becomes a features/columns
# np.corrcoef() --> computes pearson crrelation cofficients between columns
correlation=np.corrcoef(data.T)
correlation

array([[ 1.00000000e+00,  5.77708002e-01,  1.09981939e-02,
         4.13330312e-01, -7.24346027e-02, -6.73372350e-01,
        -1.53619238e-02,  4.23850963e-02],
       [ 5.77708002e-01,  1.00000000e+00,  2.88301028e-02,
        -1.85335034e-02,  3.17467013e-02,  4.49066779e-02,
        -8.72849276e-02,  3.32677786e-02],
       [ 1.09981939e-02,  2.88301028e-02,  1.00000000e+00,
        -1.36513476e-02, -1.26165374e-02,  1.26036539e-02,
        -4.32776640e-03, -3.46000797e-02],
       [ 4.13330312e-01, -1.85335034e-02, -1.36513476e-02,
         1.00000000e+00, -4.50784360e-02, -1.35694204e-02,
        -9.19703810e-04,  1.23816682e-02],
       [-7.24346027e-02,  3.17467013e-02, -1.26165374e-02,
        -4.50784360e-02,  1.00000000e+00,  3.67558989e-02,
        -1.41255911e-03, -4.63319862e-03],
       [-6.73372350e-01,  4.49066779e-02,  1.26036539e-02,
        -1.35694204e-02,  3.67558989e-02,  1.00000000e+00,
        -5.50589312e-02, -2.29716893e-02],
       [-1.53619238e-02, -8.728492

In [27]:
# data → NumPy array of the numerical data.
# df.columns.get_loc('Price') → Gets the index (column position) of 'Price'.
# data_np[:, index] > 10 → Creates a boolean mask for rows where Price > 10.
# The result is all rows from data_np where the condition is true.
high_price_rows= data[data[:,df.columns.get_loc('Price')]>10]
high_price_rows

array([[2.40368e+05, 5.99000e+01, 4.60000e-01, ..., 1.25600e+01,
        1.06500e+03, 1.08160e+03],
       [8.26970e+04, 4.69400e+01, 2.70000e-01, ..., 2.89000e+01,
        1.40700e+03, 1.85527e+03],
       [2.71990e+05, 5.70800e+01, 1.70000e-01, ..., 1.56000e+01,
        1.72300e+03, 2.91629e+03],
       ...,
       [3.28940e+04, 2.27900e+01, 4.90000e-01, ..., 2.92300e+01,
        5.03000e+02, 1.02022e+03],
       [3.90910e+04, 1.38400e+01, 1.40000e-01, ..., 2.31300e+01,
        2.90200e+03, 1.20103e+03],
       [3.72900e+04, 7.81000e+00, 1.90000e-01, ..., 2.34000e+01,
        1.82000e+03, 6.53620e+02]])

In [28]:
# min - max normalization
# subract min and divide by range --> scales each feature to the range [0,1]
norm_data =(data - np.min(data,axis=0))/(np.max(data,axis=0)- np.min(data,axis=0))
norm_data

array([[0.60822306, 0.99899615, 0.92      , ..., 0.4160804 , 0.35449382,
        0.33557772],
       [0.5237879 , 0.11862138, 0.78      , ..., 0.17755444, 0.61075844,
        0.2004878 ],
       [0.43523967, 0.16095031, 0.82      , ..., 0.11390285, 0.19478784,
        0.33085666],
       ...,
       [0.07884732, 0.37811611, 0.98      , ..., 0.97453936, 0.16672235,
        0.31631055],
       [0.09465914, 0.22837544, 0.28      , ..., 0.77018425, 0.96825927,
        0.37306677],
       [0.09006384, 0.12748871, 0.38      , ..., 0.77922948, 0.60674908,
        0.20123488]])

In [29]:
#  Calculates the mean of each numeric column and rounds the result to 2 decimal places
np.round(df.mean(numeric_only=True),2)

Unnamed: 0,0
Sale,171327.12
InStrSpending,30.59
Discount,0.25
TVSpending,98.68
StockRate,0.49
Price,14.6
Radio,1479.57
OnlineAdsSpending,1596.5


In [30]:
# Returns the minimum value of each numeric column in the DataFrame, rounded to 2 decimal places
np.round(df.min(numeric_only=True),2)

Unnamed: 0,0
Sale,1992.0
InStrSpending,0.19
Discount,0.0
TVSpending,0.13
StockRate,0.0
Price,0.14
Radio,4.0
OnlineAdsSpending,12.54


In [31]:
# Returns the maximum value of each numeric column rounded to 2 decimal places
np.round(df.max(numeric_only=True),2)

Unnamed: 0,0
Sale,393914.0
InStrSpending,59.96
Discount,0.5
TVSpending,199.91
StockRate,1.0
Price,29.99
Radio,2997.0
OnlineAdsSpending,3198.27


In [32]:
# Sums each numeric column and rounds the result to 2 decimal places
np.round(df.sum(numeric_only=True),2)

Unnamed: 0,0
Sale,169956500.0
InStrSpending,30348.29
Discount,249.13
TVSpending,97889.31
StockRate,490.57
Price,14483.03
Radio,1467733.0
OnlineAdsSpending,1583732.0


In [33]:
# Calculates the standard deviation of all numeric columns and rounds the result to 2 decimal places
np.round(df.std(numeric_only=True),2)

Unnamed: 0,0
Sale,81397.84
InStrSpending,17.49
Discount,0.15
TVSpending,57.12
StockRate,0.29
Price,8.72
Radio,885.42
OnlineAdsSpending,927.47


In [34]:
# Calculates and rounds variance of all numeric columns to 2 decimal places
np.round(df.var(numeric_only=True),2)

Unnamed: 0,0
Sale,6625609000.0
InStrSpending,306.01
Discount,0.02
TVSpending,3262.39
StockRate,0.08
Price,75.96
Radio,783967.9
OnlineAdsSpending,860209.5


In [35]:
# Calculate cumulative sum of each column, take first 5 rows, and round values to 2 decimal places
np.round(df.cumsum().head(),2)

Unnamed: 0,Sale,InStrSpending,Discount,TVSpending,StockRate,Price,Radio,OnlineAdsSpending
0,240368,59.9,0.46,46.3,0.45,12.56,1065,1081.6
1,447644,67.18,0.85,213.21,0.5,18.0,2897,1732.84
2,620216,76.99,1.26,286.88,0.56,21.54,3484,2799.4
3,702913,123.93,1.53,320.83,1.44,50.44,4891,4654.67
4,844675,126.52,1.79,403.46,1.8,55.1,6948,6263.58


In [36]:
# Computes cumulative product row-wise for each column and rounds to 2 decimal places
np.round(df.cumprod().head(),2)

  return bound(*args, **kwds)


Unnamed: 0,Sale,InStrSpending,Discount,TVSpending,StockRate,Price,Radio,OnlineAdsSpending
0,240368,59.9,0.46,46.3,0.45,12.56,1065,1081.6
1,49822517568,436.07,0.18,7727.93,0.02,68.33,1951080,704381.2
2,8597971501744896,4277.87,0.07,569316.8,0.0,241.88,1145283960,751264800.0
3,-8396569594874848512,200803.05,0.02,19328310.0,0.0,6990.2,1611414531720,1393799000000.0
4,-1444064392037632512,520079.89,0.01,1597098000.0,0.0,32574.34,3314679691748040,2242497000000000.0


In [37]:
# Get 25th percentile (Q1) for each numeric column, rounded to 2 decimals
np.round(df.quantile(0.25,numeric_only=True),2)

Unnamed: 0,0.25
Sale,112479.25
InStrSpending,14.83
Discount,0.13
TVSpending,49.64
StockRate,0.25
Price,6.92
Radio,708.25
OnlineAdsSpending,786.33


In [38]:
# Returns the median of each numeric column rounded to 2 decimal places
np.round(df.quantile(0.50,numeric_only=True),2)

Unnamed: 0,0.5
Sale,170390.5
InStrSpending,31.38
Discount,0.25
TVSpending,97.51
StockRate,0.49
Price,14.82
Radio,1413.5
OnlineAdsSpending,1595.46


In [39]:
# Returns the number of unique values in each column of the DataFrame
df.nunique()

Unnamed: 0,0
Sale,990
InStrSpending,917
Discount,51
TVSpending,959
StockRate,101
Price,839
Radio,852
OnlineAdsSpending,991
