# NumPy Basics <hr style = "border:10px solid#12345678">

Data Analyst: Ian Patrick M. Alvior <br> Department: Electrical Engineering

Install and update NumPy

In [2]:
#%pip install numpy --upgrade

In [4]:
pip install numpy

Note: you may need to restart the kernel to use updated packages.


In [5]:
pip install --upgrade pip

Note: you may need to restart the kernel to use updated packages.


Install and update scipy

In [4]:
#%pip install scipy --upgrade

In [3]:
pip install scipy

Note: you may need to restart the kernel to use updated packages.


In [2]:
# imports and packages
import numpy as np
from scipy import stats

# 1D Array

In [7]:
# 1-dimensional array
array_a = np.array([1,2,3])
array_a

array([1, 2, 3])

In [8]:
# size of an array
np.shape(array_a)


(3,)

In [9]:
# 1-dimensional array
array_b = np.array([4,5,6])
array_b

array([4, 5, 6])

# 2D Array

In [10]:
# 2-dimensional array
my_array = np.array([[1,2,3],[4,5,6]])
my_array

array([[1, 2, 3],
       [4, 5, 6]])

In [11]:
# size of matrix
np.shape(my_array)

(2, 3)

In [12]:
# transpose of a matrix
t_array = my_array.T
t_array

array([[1, 4],
       [2, 5],
       [3, 6]])

In [13]:
# size of matrix
np.shape(t_array)

(3, 2)

# Measures of Central Tendency

Fruit Price List

In [14]:
# fruit price list dataset
fruits = np.array([120,60,85,150,200])
fruits

array([120,  60,  85, 150, 200])

In [15]:
# mean
fruits_mean = np.mean(fruits)
fruits_mean

np.float64(123.0)

In [16]:
# median
fruits_median = np.median(fruits)
fruits_median

np.float64(120.0)

In [17]:
# sort
fruits_sorted = np.sort(fruits)
fruits_sorted

array([ 60,  85, 120, 150, 200])

In [4]:
# mode
fruits_mode = stats.mode(fruits)
fruits_mode

ModeResult(mode=np.int64(60), count=np.int64(1))

In [19]:
# population standard deviation
fruits_std = np.std(fruits)
fruits_std

np.float64(49.15282290977803)

Voltage Response

In [20]:
# voltage response dataset
voltage = np.array([
    [1,2,3,4,5,6,7,8],
    [12,5,9.2,3.3,24,18.9,15.4,np.nan],
    [2.5,4.3,6,9,11.2,14.5,17.8,20]
])
voltage

array([[ 1. ,  2. ,  3. ,  4. ,  5. ,  6. ,  7. ,  8. ],
       [12. ,  5. ,  9.2,  3.3, 24. , 18.9, 15.4,  nan],
       [ 2.5,  4.3,  6. ,  9. , 11.2, 14.5, 17.8, 20. ]])

# Measures of Variability

Exam Performance

In [21]:
# exam performance dataset
grade = np.array([9.8,9.4,9.1,8.9,8.8,8.5,8.2,7.8,7.4,7,6.7,3.5])
grade

array([9.8, 9.4, 9.1, 8.9, 8.8, 8.5, 8.2, 7.8, 7.4, 7. , 6.7, 3.5])

Cake Price List

In [22]:
# cake price list dataset
price = np.array([
    [3.3,12,5.3,11.2,10.2,4.5,6.2,6.4,8,9.84],
    [120,420,163.3,72.3,157,64.2,99,100,125,120.2]
])
price

array([[  3.3 ,  12.  ,   5.3 ,  11.2 ,  10.2 ,   4.5 ,   6.2 ,   6.4 ,
          8.  ,   9.84],
       [120.  , 420.  , 163.3 ,  72.3 , 157.  ,  64.2 ,  99.  , 100.  ,
        125.  , 120.2 ]])

Pooled Standard Deviation

In [23]:
# battery life dataset
battery = np.array([
    ['A','A','A','A','A','B','B','B','B','B','C','C','C','C','C'],
    [12.1,12.2,12.3,12.4,12.5,12.6,12.7,12.8,12.9,11,11.1,11.2,11.3,11.4,11.5]
])
battery

array([['A', 'A', 'A', 'A', 'A', 'B', 'B', 'B', 'B', 'B', 'C', 'C', 'C',
        'C', 'C'],
       ['12.1', '12.2', '12.3', '12.4', '12.5', '12.6', '12.7', '12.8',
        '12.9', '11', '11.1', '11.2', '11.3', '11.4', '11.5']],
      dtype='<U32')

# Assignment

Jaguar Measures of Central Tendency

In [8]:
jaguar = np.array([10.6,9.1,9.3,9.8,10.5,10.4,9.5,11,10.4,3,9.8])
jaguar

array([10.6,  9.1,  9.3,  9.8, 10.5, 10.4,  9.5, 11. , 10.4,  3. ,  9.8])

In [9]:
jaguar_mean = np.mean(jaguar)
jaguar_mean

np.float64(9.399999999999999)

In [23]:
jaguar_median = np.median(jaguar)
jaguar_median

np.float64(9.8)

In [10]:
jaguar_mode = stats.mode(jaguar)
jaguar_mode

ModeResult(mode=np.float64(9.8), count=np.int64(2))

In [12]:
jaguar_std = np.std(jaguar)
jaguar_std

np.float64(2.101947149236112)

In [14]:
jaguar_sorted = np.sort(jaguar)
jaguar_sorted

array([ 3. ,  9.1,  9.3,  9.5,  9.8,  9.8, 10.4, 10.4, 10.5, 10.6, 11. ])

Panther Measures of Central Tendency

In [13]:
panther = np.array([10.1,11,9.1,20,9.2,10.8,9.9,9.2,9.1,9.1])
panther

array([10.1, 11. ,  9.1, 20. ,  9.2, 10.8,  9.9,  9.2,  9.1,  9.1])

In [17]:
panther_mean = np.mean(panther)
panther_mean

np.float64(10.75)

In [18]:
panther_median = np.median(panther)
panther_median

np.float64(9.55)

In [19]:
panther_mode = stats.mode(panther)
panther_mode

ModeResult(mode=np.float64(9.1), count=np.int64(3))

In [21]:
panther_std = np.std(panther)
panther_std

np.float64(3.1582431825304393)

In [22]:
panther_sorted = np.sort(panther)
panther_sorted

array([ 9.1,  9.1,  9.1,  9.2,  9.2,  9.9, 10.1, 10.8, 11. , 20. ])