In [1]:
import numpy as np

In [2]:
w1, w2, w3 = 0.3, 0.2, 0.5

In [3]:
kanto_temp = 73
kanto_rainfall = 67
kanto_humidity = 43

In [4]:
kanto_yield_apples = kanto_temp * w1 + kanto_rainfall * w2 + kanto_humidity * w3

In [5]:
print(f"The expected yield of apple in region kanto is {kanto_yield_apples} tons per hectare")

The expected yield of apple in region kanto is 56.8 tons per hectare


In [6]:
kanto = [73, 67, 43]
johto = [91, 88, 64]
hoenn = [87, 134, 58]
sinnoh = [102, 43, 37]
unova = [69, 96, 70]

In [7]:
weights = [w1, w2, w3]

In [8]:
kanto

[73, 67, 43]

In [9]:
weights

[0.3, 0.2, 0.5]

In [10]:
for region, weight in zip(kanto, weights):
    print("Region Values:", region, "Weight:", weight)

Region Values: 73 Weight: 0.3
Region Values: 67 Weight: 0.2
Region Values: 43 Weight: 0.5


In [11]:
def crop_yields(region, weights):
    result = 0
    for x, w in zip(region, weights):
        result += x * w
    return result

In [12]:
crop_yields(kanto, weights)

56.8

In [13]:
crop_yields(johto, weights)

76.9

In [14]:
crop_yields(sinnoh, weights)

57.699999999999996

In [15]:
kanto = np.array([73, 67, 43])

In [16]:
kanto

array([73, 67, 43])

In [17]:
weights = np.array([w1, w2, w3])

In [18]:
weights

array([0.3, 0.2, 0.5])

In [19]:
type(kanto)

numpy.ndarray

In [20]:
type(weights)

numpy.ndarray

Numpy Arrays also support Indexing Notation [i] like lists in Python

In [21]:
kanto[0]

73

In [22]:
weights[2]

0.5

# Operating on Numpy Arrays

In [23]:
help(np.dot)

Help on _ArrayFunctionDispatcher in module numpy:

dot(...)
    dot(a, b, out=None)

    Dot product of two arrays. Specifically,

    - If both `a` and `b` are 1-D arrays, it is inner product of vectors
      (without complex conjugation).

    - If both `a` and `b` are 2-D arrays, it is matrix multiplication,
      but using :func:`matmul` or ``a @ b`` is preferred.

    - If either `a` or `b` is 0-D (scalar), it is equivalent to
      :func:`multiply` and using ``numpy.multiply(a, b)`` or ``a * b`` is
      preferred.

    - If `a` is an N-D array and `b` is a 1-D array, it is a sum product over
      the last axis of `a` and `b`.

    - If `a` is an N-D array and `b` is an M-D array (where ``M>=2``), it is a
      sum product over the last axis of `a` and the second-to-last axis of
      `b`::

        dot(a, b)[i,j,k,m] = sum(a[i,j,:] * b[k,:,m])

    It uses an optimized BLAS library when possible (see `numpy.linalg`).

    Parameters
    ----------
    a : array_like
        Fir

In [24]:
np.dot(kanto, weights)

56.8

In [25]:
kanto

array([73, 67, 43])

In [26]:
weights

array([0.3, 0.2, 0.5])

In [27]:
kanto * weights

array([21.9, 13.4, 21.5])

In [28]:
(kanto * weights).sum()

56.8

In [29]:
# arr1 = np.array([1,2,3])
# arr2 = np.array([4,5,6])

In [31]:
# arr1 * arr2

In [34]:
# arr1.sum()

In [33]:
# arr2.sum()

# Benefits of Using Numpy Arrays
* Ease of Use
* Performance

In [35]:
# Lists in Python
arr1 = list(range(100000))
arr2 = list(range(100000, 200000))


# Numpy Arrays
arr1_np = np.array(arr1)
arr2_np = np.array(arr2)

In [36]:
%%time 
result = 0
for x1, x2 in zip(arr1, arr2):
    result += x1 * x2
print(result)

833323333350000
CPU times: total: 46.9 ms
Wall time: 69 ms


In [37]:
%%time
np.dot(arr1_np, arr2_np)

CPU times: total: 0 ns
Wall time: 0 ns


893678192

# Multi-Dimensional Numpy Arrays

In [38]:
# 2D Array (matrix)
climate_data = np.array([[73, 67, 43],
                         [91, 88, 64],
                         [87, 134, 58],
                         [102, 43, 37],
                         [69, 96, 70]])

In [39]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [40]:
climate_data.shape

(5, 3)

In [41]:
# 1D Array (vectir)
weights

array([0.3, 0.2, 0.5])

In [42]:
weights.shape

(3,)

In [43]:
# 3D Array
arr3 = np.array([
                 [[11, 12, 13],
                 [13, 14, 15]],
                 [[15, 16, 17],
                 [17, 18, 19.5]]])

In [44]:
arr3

array([[[11. , 12. , 13. ],
        [13. , 14. , 15. ]],

       [[15. , 16. , 17. ],
        [17. , 18. , 19.5]]])

In [45]:
arr3.shape

(2, 2, 3)

In [46]:
weights.dtype

dtype('float64')

In [47]:
climate_data.dtype

dtype('int32')

In [48]:
arr3.dtype

dtype('float64')

In [49]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [50]:
weights

array([0.3, 0.2, 0.5])

In [51]:
# matrix multiplication
np.matmul(climate_data, weights) 

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [52]:
climate_data @ weights

array([56.8, 76.9, 81.9, 57.7, 74.9])

# Workign with CSV data files

In [53]:
climate_data = np.genfromtxt('temperature_rainfall_humidity_10000.txt', delimiter=',', skip_header=1)

In [54]:
climate_data

array([[2.081e+01, 4.000e-02, 4.605e+01],
       [6.437e+01, 9.020e+01, 3.013e+01],
       [3.518e+01, 5.480e+00, 1.821e+01],
       ...,
       [7.269e+01, 9.380e+00, 3.023e+01],
       [3.171e+01, 6.710e+01, 8.569e+01],
       [8.205e+01, 1.239e+01, 7.624e+01]])

In [55]:
climate_data.shape

(10000, 3)

In [56]:
weights = np.array([0.3, 0.2, 0.5])

In [57]:
yields = climate_data @ weights

In [58]:
yields

array([29.276, 52.416, 20.755, ..., 38.798, 65.778, 65.213])

In [59]:
yields.shape

(10000,)

In [60]:
climate_results = np.concatenate((climate_data, yields.reshape(10000, 1)), axis=1)

In [61]:
climate_results

array([[2.0810e+01, 4.0000e-02, 4.6050e+01, 2.9276e+01],
       [6.4370e+01, 9.0200e+01, 3.0130e+01, 5.2416e+01],
       [3.5180e+01, 5.4800e+00, 1.8210e+01, 2.0755e+01],
       ...,
       [7.2690e+01, 9.3800e+00, 3.0230e+01, 3.8798e+01],
       [3.1710e+01, 6.7100e+01, 8.5690e+01, 6.5778e+01],
       [8.2050e+01, 1.2390e+01, 7.6240e+01, 6.5213e+01]])

In [62]:
help(np.concatenate)

Help on _ArrayFunctionDispatcher in module numpy:

concatenate(...)
    concatenate((a1, a2, ...), axis=0, out=None, dtype=None, casting="same_kind")

    Join a sequence of arrays along an existing axis.

    Parameters
    ----------
    a1, a2, ... : sequence of array_like
        The arrays must have the same shape, except in the dimension
        corresponding to `axis` (the first, by default).
    axis : int, optional
        The axis along which the arrays will be joined.  If axis is None,
        arrays are flattened before use.  Default is 0.
    out : ndarray, optional
        If provided, the destination to place the result. The shape must be
        correct, matching that of what concatenate would have returned if no
        out argument were specified.
    dtype : str or dtype
        If provided, the destination array will have this dtype. Cannot be
        provided together with `out`.

        .. versionadded:: 1.20.0

    casting : {'no', 'equiv', 'safe', 'same_kind', 

In [63]:
help(np.reshape)

Help on _ArrayFunctionDispatcher in module numpy:

reshape(a, newshape, order='C')
    Gives a new shape to an array without changing its data.

    Parameters
    ----------
    a : array_like
        Array to be reshaped.
    newshape : int or tuple of ints
        The new shape should be compatible with the original shape. If
        an integer, then the result will be a 1-D array of that length.
        One shape dimension can be -1. In this case, the value is
        inferred from the length of the array and remaining dimensions.
    order : {'C', 'F', 'A'}, optional
        Read the elements of `a` using this index order, and place the
        elements into the reshaped array using this index order.  'C'
        means to read / write the elements using C-like index order,
        with the last axis index changing fastest, back to the first
        axis index changing slowest. 'F' means to read / write the
        elements using Fortran-like index order, with the first index
     

In [64]:
np.savetxt('climate_results.txt', 
           climate_results, 
          fmt='%.2f',
          header='temprature, rainfall, humidity, yeild_apples',
          comments=' ')

# Arithmetic Operations and Broadcasting

In [67]:
arr2 = np.array([[1,2,3,4],
                 [5,6,7,8],
                 [9,1,2,3]])

In [68]:
arr3 = np.array([[11,12,13,14],
                 [15,16,17,18],
                 [19,11,12,13]])

In [70]:
arr2 + arr3

array([[12, 14, 16, 18],
       [20, 22, 24, 26],
       [28, 12, 14, 16]])

In [69]:
# Adding a scalar
arr2 + 3

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12,  4,  5,  6]])

In [71]:
arr3 - arr2

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

In [72]:
# Divsion by scalar
arr2 / 2

array([[0.5, 1. , 1.5, 2. ],
       [2.5, 3. , 3.5, 4. ],
       [4.5, 0.5, 1. , 1.5]])

In [73]:
arr2 % 4

array([[1, 2, 3, 0],
       [1, 2, 3, 0],
       [1, 1, 2, 3]], dtype=int32)

In [74]:
arr2 = np.array([[1,2,3,4],
                 [5,6,7,8],
                 [9,1,2,3]])

In [75]:
arr2.shape

(3, 4)

In [76]:
 arr4 = np.array([4,5,6,7])

In [77]:
arr4.shape

(4,)

In [78]:
# elements of array4 will be added to array2 elements row wise with all the rows of array2
arr2 + arr4

array([[ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13,  6,  8, 10]])

In [79]:
arr5 = np.array([7,8])

In [82]:
arr5.shape

(2,)

In [85]:
# arr2 + arr5 # this will throw error 

In [86]:
arr1 = np.array([[1,2,3,4],[4,5,6,7]])
arr2 = np.array([[11,22,35,43],[44,54,66,78]])

In [87]:
arr1 == arr2

array([[False, False, False, False],
       [False, False, False, False]])

In [88]:
arr1 != arr2

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [89]:
arr1 > arr2

array([[False, False, False, False],
       [False, False, False, False]])

In [90]:
arr1 < arr2

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [91]:
arr1 >= arr2

array([[False, False, False, False],
       [False, False, False, False]])

In [92]:
arr1 <= arr2

array([[ True,  True,  True,  True],
       [ True,  True,  True,  True]])

In [94]:
# number of matching elements in the array
(arr1 == arr2).sum()  

8

In [98]:
arr3 = np.array([
            [[11,12,13,14],
             [15,16,17,18]],
    
            [[15,16,17,21],
             [34,45,56,78]],
    
            [[98,34,12,56],
             [17,18,19.5,3]]
                            ])

In [99]:
arr3.shape

(3, 2, 4)

In [100]:
# single element using indexing
arr3[1, 1, 2]

56.0

In [101]:
# subarray using ranges
arr3[0:, 0:1, :2]

array([[[11., 12.]],

       [[15., 16.]],

       [[98., 34.]]])

In [102]:
# mixing indexing and slicing
arr3[1:, 1, :3]

array([[34. , 45. , 56. ],
       [17. , 18. , 19.5]])

In [104]:
# Using fewer indices
arr3[0]

array([[11., 12., 13., 14.],
       [15., 16., 17., 18.]])

In [105]:
arr3[2]

array([[98. , 34. , 12. , 56. ],
       [17. , 18. , 19.5,  3. ]])

# Other ways of Creatign Numpy Arrays

In [107]:
np.zeros((3, 2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [108]:
np.ones((3, 2))

array([[1., 1.],
       [1., 1.],
       [1., 1.]])

In [109]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [111]:
# random vector
np.random.rand(5)

array([0.15151016, 0.85308965, 0.06429687, 0.20938892, 0.07092626])

In [113]:
np.random.randn(2, 3)

array([[-1.74293236,  0.64379856,  0.13921597],
       [-0.68832052,  1.07015625, -0.3689809 ]])

In [114]:
np.full([2, 3], 42)

array([[42, 42, 42],
       [42, 42, 42]])

In [116]:
# array with range(start, stop - 1, step)
np.arange(9, 90, 3)

array([ 9, 12, 15, 18, 21, 24, 27, 30, 33, 36, 39, 42, 45, 48, 51, 54, 57,
       60, 63, 66, 69, 72, 75, 78, 81, 84, 87])

In [118]:
np.arange(9, 90, 3).reshape(3,3,3)

array([[[ 9, 12, 15],
        [18, 21, 24],
        [27, 30, 33]],

       [[36, 39, 42],
        [45, 48, 51],
        [54, 57, 60]],

       [[63, 66, 69],
        [72, 75, 78],
        [81, 84, 87]]])

In [117]:
np.linspace(3, 27, 9)

array([ 3.,  6.,  9., 12., 15., 18., 21., 24., 27.])

# Interacting with OS and FileSystem

In [3]:
import os

In [4]:
# get current working directory
os.getcwd()

'C:\\Users\\Sajal\\Documents\\Data Analysis with Python'

In [5]:
help(os.listdir)

Help on built-in function listdir in module nt:

listdir(path=None)
    Return a list containing the names of the files in the directory.

    path can be specified as either str, bytes, or a path-like object.  If path is bytes,
      the filenames returned will also be bytes; in all other circumstances
      the filenames returned will be str.
    If path is None, uses the path='.'.
    On some platforms, path may also be specified as an open file descriptor;\
      the file descriptor must refer to a directory.
      If this functionality is unavailable, using it raises NotImplementedError.

    The list is in arbitrary order.  It does not include the special
    entries '.' and '..' even if they are present in the directory.



In [6]:
# list directories  (relative path)
os.listdir('.')

['.ipynb_checkpoints',
 '01.Numertical Computing with Numpy.ipynb',
 'climate_results.txt',
 'temperature_rainfall_humidity_10000.csv',
 'temperature_rainfall_humidity_10000.txt']

In [13]:
type(os.listdir('.'))

list

In [1]:
# list directories  (relative path)
os.listdir('/Data Analysis with Python')

In [8]:
os.makedirs('.\\data', exist_ok = True)

In [9]:
os.listdir('.')

['.ipynb_checkpoints',
 '01.Numertical Computing with Numpy.ipynb',
 'climate_results.txt',
 'data',
 'temperature_rainfall_humidity_10000.csv',
 'temperature_rainfall_humidity_10000.txt']

In [10]:
'data' in os.listdir('.')

True

In [11]:
os.listdir('.\\data')

[]