# Intro to SciPy and NumPy

## SciPy

### Get Scientific, Mathematical, and Engineering Constants in SciPy

In [2]:
from scipy import constants

In [3]:
#get full list of stored constants in SciPy
dir(constants)

['Avogadro',
 'Boltzmann',
 'Btu',
 'Btu_IT',
 'Btu_th',
 'G',
 'Julian_year',
 'N_A',
 'Planck',
 'R',
 'Rydberg',
 'Stefan_Boltzmann',
 'Wien',
 '__all__',
 '__builtins__',
 '__cached__',
 '__doc__',
 '__file__',
 '__loader__',
 '__name__',
 '__package__',
 '__path__',
 '__spec__',
 '_codata',
 '_constants',
 '_obsolete_constants',
 'acre',
 'alpha',
 'angstrom',
 'arcmin',
 'arcminute',
 'arcsec',
 'arcsecond',
 'astronomical_unit',
 'atm',
 'atmosphere',
 'atomic_mass',
 'atto',
 'au',
 'bar',
 'barrel',
 'bbl',
 'blob',
 'c',
 'calorie',
 'calorie_IT',
 'calorie_th',
 'carat',
 'centi',
 'codata',
 'constants',
 'convert_temperature',
 'day',
 'deci',
 'degree',
 'degree_Fahrenheit',
 'deka',
 'dyn',
 'dyne',
 'e',
 'eV',
 'electron_mass',
 'electron_volt',
 'elementary_charge',
 'epsilon_0',
 'erg',
 'exa',
 'exbi',
 'femto',
 'fermi',
 'find',
 'fine_structure',
 'fluid_ounce',
 'fluid_ounce_US',
 'fluid_ounce_imp',
 'foot',
 'g',
 'gallon',
 'gallon_US',
 'gallon_imp',
 'gas_co

In [4]:
constants.pi

3.141592653589793

In [5]:
constants.metric_ton

1000.0

In [6]:
x = constants.metric_ton
#50 metric tons
50 * x

50000.0

### trigonometry 

In [7]:
import numpy as np
np.sin(45*constants.degree)

0.7071067811865475

### Use Case 1: Statistical Calculations

In [8]:
#use statistical functions from scipy

from scipy import stats, mean
x = [1,13,44,5,12,8,4]

In [9]:
mean(x)

  mean(x)


12.428571428571429

In [10]:
#calculate geometric mean
stats.gmean(x)

7.293848713272408

In [11]:
#calculate skewness
stats.skew(x)

1.6809331995419865

In [12]:
# generate normal distribution
import numpy as np
from scipy import stats

# Generate a sample from a normal distribution
sample = stats.norm(0, 1).rvs(1000)

# Calculate the mean and standard deviation of the sample
mean = np.mean(sample)
std = np.std(sample)
print(mean, std)


0.001925490497505848 0.9975194167987361


### Use Case 2: Get the derivative of an equation

Get the derivative of the following function for value of 1:
$$ 3x^2 + 9x - 5 $$

In [13]:
#import derivative function from sub module misc and main module scipy
from scipy.misc import derivative as drv

#define the equation as a function
def eqn(x):
    return 3 * x**2 + 9*x - 5

In [14]:
#calculate the derivative for value = 1
drv(eqn,1)

  drv(eqn,1)


15.0

## NumPy

### Basics

In [15]:
import numpy as np

In [16]:
#let's build our first array
arr = np.array([1,2,3])

In [17]:
type(arr)

numpy.ndarray

In [18]:
#convert a list to an array
lst = [12,20,40]

arr = np.array(lst)
print(type(lst))
print(type(arr))

<class 'list'>
<class 'numpy.ndarray'>


### `arange` function

In [19]:
# for lists
list(range(20))

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]

In [20]:
# use arange function to create an array
# start, end
np.arange(0, 5)

array([0, 1, 2, 3, 4])

In [21]:
# generate an array using arange
np.arange(20)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [22]:
#using steps
# start, end, steps
np.arange(0, 5, 0.5)

array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

In [23]:
np.arange(0, 5, 0.5, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Speed Test `ndarray` vs `list`

In [24]:
import time
import numpy as np

# Define the size of the data
size = 10**6

# Create a list and NumPy array with random data
list_data = list(range(size))
numpy_array = np.arange(size)

# Perform element-wise multiplication using a loop (for list)
start_time = time.time()
for i in range(size):
    list_data[i] *= 2
end_time = time.time()
list_time = end_time - start_time


# Perform element-wise multiplication using NumPy
start_time = time.time()
numpy_array *= 2
end_time = time.time()
numpy_time = end_time - start_time

print(f"Time taken for list: {list_time} seconds")
print(f"Time taken for NumPy array: {numpy_time} seconds")


Time taken for list: 0.05391192436218262 seconds
Time taken for NumPy array: 0.0003597736358642578 seconds


In [25]:
# 0-d array is an array that has only one element and no dimensions
a0 = np.array(40)
#use ndim function to get the num of dim
a0.ndim

0

In [26]:
# 1-d array
a1 = np.array([1,2,3])
a1.ndim

1

In [27]:
#2d array
a2 = np.array([[10,20,30],
             [50,60,70]])
print(a2)

[[10 20 30]
 [50 60 70]]


In [28]:
a2.ndim

2

In [29]:
#shape gives me (rows, cols)
a2.shape

(2, 3)

In [30]:
# this is a matrix with 2 rows and 3 cols
print('This is a matrix with', a2.shape[0], 'rows and', a2.shape[1], 'cols')

This is a matrix with 2 rows and 3 cols


In [31]:
a2.size

6

## Arithmetic Operations

In [32]:
arr1 = np.array([10,20,30]) 
arr2 = np.array([50,60,70]) 

In [33]:
arr1 + arr2

array([ 60,  80, 100])

In [34]:
arr1 * arr2

array([ 500, 1200, 2100])

In [35]:
arr1 / arr2

array([0.2       , 0.33333333, 0.42857143])

### Broadcasting

In [36]:
a = np.array([1.0, 2.0, 3.0])
b = 2.0
a * b

array([2., 4., 6.])

![br](https://numpy.org/doc/stable/_images/broadcasting_1.png)

In [37]:
a = np.array([[ 0.0,  0.0,  0.0],
              [10.0, 10.0, 10.0],
              [20.0, 20.0, 20.0],
              [30.0, 30.0, 30.0]])
              
b = np.array([1.0, 2.0, 3.0])
a + b

array([[ 1.,  2.,  3.],
       [11., 12., 13.],
       [21., 22., 23.],
       [31., 32., 33.]])

![br](https://numpy.org/doc/stable/_images/broadcasting_2.png)

In [38]:
#broadcasting error
a = np.array([[ 0.0,  0.0,  0.0],
              [10.0, 10.0, 10.0],
              [20.0, 20.0, 20.0],
              [30.0, 30.0, 30.0]])
              
b = np.array([1.0, 2.0, 3.0, 4])
a + b

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

![brd](https://numpy.org/doc/stable/_images/broadcasting_3.png)

### NumPy Functions
reshape, arange, concatenate, add, etc..

In [None]:
arr = np.arange(1,10)
arr

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
#specify (row,col)
arr.reshape(3,3)
#make sure you are specifying dimensions that work with the size of the array

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
# converting a 1d array to a 3d array
arr = np.arange(1,13)

# to reshape, specify size for z, row, col
arr_aft = arr.reshape(2,2,3)

print('Array before:\n',arr)
print('Array After:\n',arr_aft)

Array before:
 [ 1  2  3  4  5  6  7  8  9 10 11 12]
Array After:
 [[[ 1  2  3]
  [ 4  5  6]]

 [[ 7  8  9]
  [10 11 12]]]


In [None]:
#if you don't know the appropriate breakdown for reshape, use -1
arr_reshaped = arr.reshape(2,2,-1)
arr_reshaped

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

In [None]:
# convert back to 
arr_reshaped.reshape(-1)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [None]:
arr1 = np.array([10,20,30])
arr2 = np.array([5,2,7])
#using mod function 
np.mod(arr1,arr2)

array([0, 0, 2])

In [None]:
10 % 3

1

In [None]:
#adding 2 arrays methods

#get sum of 2 arrays
arr1 + arr2

array([15, 22, 37])

In [None]:
#method 2
np.add(arr1,arr2)

array([15, 22, 37])

### For Text Arrays

In [None]:
a = np.array(['hello ', 'welcome '])
b = np.array(['learners', 'to the class'])
#to add the two arrays, you need to use char sub-module from numpy
np.char.add(a,b)

array(['hello learners', 'welcome to the class'], dtype='<U20')

In [None]:
fst_nm_col = np.array(['Mark ', 'Betty '])
lst_nm_col = np.array(['Willis', 'Conway'])
#to add the two arrays, you need to use char sub-module from numpy
np.char.add(fst_nm_col,lst_nm_col)

array(['Mark Willis', 'Betty Conway'], dtype='<U12')

In [None]:
fst_nm_col.dtype

dtype('<U6')

In [None]:
#using the concat to add gives a different output
#plus sign works only for lists and numpy numerical values
np.concatenate((fst_nm_col,lst_nm_col))

array(['Mark ', 'Betty ', 'Willis', 'Conway'], dtype='<U6')

In [None]:
#using concatenate with axis attribute
arr1 = np.array([[10,20]
                , [15,30]])
arr2 = np.array([[5,2]])
np.concatenate((arr1,arr2))

array([[10, 20],
       [15, 30],
       [ 5,  2]])

In [None]:
np.concatenate((arr1,arr2), axis=1)

ValueError: all the input array dimensions for the concatenation axis must match exactly, but along dimension 0, the array at index 0 has size 2 and the array at index 1 has size 1

In [None]:
arr1 = np.array([[10,20]
                , [15,30]])
arr2 = np.array([[5,2], [12,6]])
np.concatenate((arr1,arr2), axis=1)

array([[10, 20,  5,  2],
       [15, 30, 12,  6]])

Additional Char functions

In [None]:
arr1 = np.array(['hello - how are you?', 'hello and welcome'])
arr1

array(['hello - how are you?', 'hello and welcome'], dtype='<U20')

In [None]:
#replace hi with hello
np.char.replace(arr1,'hello', 'hi')

array(['hi - how are you?', 'hi and welcome'], dtype='<U17')

In [None]:
np.char.upper(arr1)

array(['HELLO - HOW ARE YOU?', 'HELLO AND WELCOME'], dtype='<U20')

### Run Statistical Functions on NumPy Arrays

In [None]:
arr = np.arange(1,13)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12])

In [None]:
#get the average od the array
np.mean(arr)

6.5

applying mean to a 2d array

In [None]:
a = np.array([[1,2,3],
              [8,4,9],
              [15,6,17]])
a

array([[ 1,  2,  3],
       [ 8,  4,  9],
       [15,  6, 17]])

In [None]:
np.mean(a)

7.222222222222222

In [None]:
np.mean(a, axis=0)

array([8.        , 4.        , 9.66666667])

In [None]:
(1+8+15)/3

8.0

In [None]:
#get the average for each row
np.mean(a, axis=1)

array([ 2.        ,  7.        , 12.66666667])

In [None]:
#get the median
np.median(a)

6.0

In [None]:
np.std(a) # default is to calculate for a flattened array

5.328701692569689

## Filtering

In [None]:
array = np.array([1,2,3,4,5,6,7,8,9])

#create a boolean mask
mask = np.where(array > 4)

# apply the mask
filtered_array = array[mask]
print(filtered_array)

In [None]:
import numpy as np

# Create a NumPy array
array = np.array([[1, 2, 3], [9, 4, 6], [7, 4, 10]])

# Create a Boolean mask
mask = np.where(array == 4)

# Select the rows of the array that correspond to the True values in the mask
filtered_array = array[mask[0], :]

print(filtered_array)


[[ 9  4  6]
 [ 7  4 10]]


In [None]:
import numpy as np

# Create a NumPy array
array = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 10]])

# Create a Boolean mask
mask = np.isin(array[:, -1], 10)

# Select the rows of the array that correspond to the True values in the mask
filtered_array = array[mask]

print(filtered_array)


array([False, False,  True])

In [None]:
mask

array([[False, False, False],
       [False, False, False],
       [False, False,  True]])

Additional use

In [41]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [42]:
np.where(x%2==0,'Even','Odd')

array(['Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even', 'Odd', 'Even',
       'Odd'], dtype='<U4')

### Applying a Custom Function in NumPy

In [30]:
# Apply a function to each element of a NumPy array
def square(x):
    return x * x + 2

array = np.array([1, 2, 3, 4, 5])
print(np.apply_along_axis(square, 0, array))

[ 3  6 11 18 27]


In [32]:
a

array([[ 1,  2,  3],
       [ 8,  4,  9],
       [15,  6, 17]])

In [31]:
np.apply_along_axis(square, 0, a)

array([[  3,   6,  11],
       [ 66,  18,  83],
       [227,  38, 291]])

### Indexing and Slicing in Arrays

In [93]:
b = np.array([5 ,2, 11, 6, 8])
            #0  1   2    3  4

In [94]:
b[0]

5

In [95]:
b[:4]

array([ 5,  2, 11,  6])

In [106]:
a = np.array([[1,2,3],
              [8,4,9],
              [15,6,17]])

In [None]:
# indices
# 0 1 2
# 1
# 2

![tag](https://www.oreilly.com/api/v2/epubs/9781449323592/files/httpatomoreillycomsourceoreillyimages2172112.png)

In [102]:
#[row, col]
a[1,1]

4

In [101]:
#if col num is not specified it gives full row
a[1]

array([8, 4, 9])

In [103]:
a[:2]

array([[1, 2, 3],
       [8, 4, 9]])

In [104]:
a[-1]

array([15,  6, 17])

In [107]:
c = np.array([[1,2,3,7],
              [8,4,9,11],
              [15,6,17,8]])
c

array([[ 1,  2,  3,  7],
       [ 8,  4,  9, 11],
       [15,  6, 17,  8]])

In [109]:
#ranges in rows and columns
c[1:,1:]

array([[ 4,  9, 11],
       [ 6, 17,  8]])

In [110]:
c[1:,2:]

array([[ 9, 11],
       [17,  8]])

In [112]:
# add 17 to 11 from the array 
c[2,2] + c[1,3]

28

In [113]:
#converting an array back to a list
list(c)

[array([1, 2, 3, 7]), array([ 8,  4,  9, 11]), array([15,  6, 17,  8])]

In [114]:
a[-1].tolist()

[15, 6, 17]

In [117]:
#indexing for 3d array
z = np.array([[[5,6,7], [44,65,60]], #layer 0 for 3rd axis
                [[53,16,4], [4,7,10]] #layer 1 for 3rd axis
                ])
z

array([[[ 5,  6,  7],
        [44, 65, 60]],

       [[53, 16,  4],
        [ 4,  7, 10]]])

In [118]:
z.shape

(2, 2, 3)

In [119]:
z[0,1,1]

65

In [120]:
z[0] #gives the first layer of 2d array

array([[ 5,  6,  7],
       [44, 65, 60]])

### Example 1: Creating an Array with List Comprehension


In [27]:
# Create an array with squares of numbers from 0 to 9 using list comprehension
arr = np.array([x**2 for x in range(10)])
print(arr)

[ 0  1  4  9 16 25 36 49 64 81]


### Example 2: Filtering Elements in an Array

In [28]:
# Create a random array
arr = np.random.randint(0, 10, size=10)

# Use list comprehension to filter even elements
even_elements = [x for x in arr if x % 2 == 0]
print(even_elements)

[6, 2, 8]




### Example 3: Applying a Function to Each Element

```python
import numpy as np

# Create a random array
arr = np.random.randint(0, 10, size=10)

# Use list comprehension to apply a function to each element (e.g., square each element)
squared_elements = [x**2 for x in arr]
print(squared_elements)
```



### Example 4: Creating a 2D Array with List Comprehension

```python
import numpy as np

# Create a 2D array with squares of numbers from 0 to 3 using list comprehension
arr_2d = np.array([[x**2 for x in range(4)] for _ in range(3)])
print(arr_2d)
```

### Example 5: Using NumPy Functions within List Comprehension

```python
import numpy as np

# Create an array and use NumPy's sin function with list comprehension
arr = np.array([np.sin(x) for x in np.linspace(0, np.pi, 5)])
print(arr)
```

Remember that while list comprehensions can be convenient, NumPy often provides more efficient ways to perform array operations directly. Depending on the complexity of the operation, it's worth considering using NumPy functions whenever possible for performance optimization.