In [None]:
w1, w2, w3 = 0.3, 0.2, 0.5

In [8]:
kanto_temp = 73
kanto_rainfall = 67
kanto_humidity = 43

In [9]:
kanto_yield_apples = kanto_temp * w1 + kanto_rainfall * w2 + kanto_humidity * w3
kanto_yield_apples

56.8

In [10]:
print("The expected yield of apples in Kanto region is {} tons per hectare.".format(kanto_yield_apples))

The expected yield of apples in Kanto region is 56.8 tons per hectare.


In [11]:
kanto = [73, 67, 43]
johto = [91, 88, 64]
hoenn = [87, 134, 58]
sinnoh = [102, 43, 37]
unova = [69, 96, 70]

In [12]:
weights = [w1, w2, w3]

In [13]:
def crop_yield(region,weights):
    result=0
    for x,w in zip(region,weights):
        result+=x*w
    return result

In [14]:
crop_yield(kanto,weights)

56.8

In [15]:
crop_yield(johto,weights)

76.9

### <u>Going from python lists to numpy arrays:

In [16]:
import numpy as np

In [17]:
kanto=np.array([73,67,43])

In [18]:
kanto

array([73, 67, 43])

In [19]:
weights=np.array([w1,w2,w3])

In [20]:
weights

array([0.3, 0.2, 0.5])

In [21]:
type(kanto)

numpy.ndarray

In [22]:
type(weights)

numpy.ndarray

In [23]:
weights[1]

0.2

In [24]:
kanto[0]

73

### <u>Operating on numpy arrays:

In [25]:
np.dot(kanto,weights)

56.8

In [26]:
(kanto*weights).sum()

56.8

In [27]:
arr1=np.array([1,2,3])
arr2=np.array([4,5,6])

In [28]:
arr1*arr2

array([ 4, 10, 18])

In [29]:
(arr1*arr2).sum()

32

In [30]:
arr2.sum()

15

### <u>Benefit of Numpy Arrays:

In [31]:
# Python lists
arr1 = list(range(1000000))
arr2 = list(range(1000000, 2000000))

# Numpy arrays
arr1_np = np.array(arr1)
arr2_np = np.array(arr2)

In [32]:
%%time
result = 0
for x1, x2 in zip(arr1, arr2):
    result += x1*x2
result

Wall time: 137 ms


833332333333500000

In [33]:
%%time
result_np = np.dot(arr1_np.astype(np.int64), arr2_np.astype(np.int64))
result_np

Wall time: 13.1 ms


833332333333500000

### <u>Multi-dimensional Numpy arrays

In [34]:
climate_data = np.array([[73, 67, 43],
                         [91, 88, 64],
                         [87, 134, 58],
                         [102, 43, 37],
                         [69, 96, 70]])

In [35]:
climate_data

array([[ 73,  67,  43],
       [ 91,  88,  64],
       [ 87, 134,  58],
       [102,  43,  37],
       [ 69,  96,  70]])

In [36]:
climate_data.shape

(5, 3)

In [37]:
weights

array([0.3, 0.2, 0.5])

In [38]:
weights.shape

(3,)

In [39]:
# 3D array 
arr3 = np.array([
    [[11, 12, 13], 
     [13, 14, 15]], 
    [[15, 16, 17], 
     [17, 18, 19.5]]])

In [40]:
arr3.shape

(2, 2, 3)

In [41]:
arr3.dtype

dtype('float64')

In [42]:
climate_data.dtype

dtype('int32')

In [43]:
weights.dtype

dtype('float64')

In [44]:
np.matmul(climate_data,weights)

array([56.8, 76.9, 81.9, 57.7, 74.9])

In [45]:
climate_data@weights

array([56.8, 76.9, 81.9, 57.7, 74.9])

### <u>Working with CSV data files

In [46]:
import urllib.request

In [47]:
urllib.request.urlretrieve('https://gist.github.com/BirajCoder/a4ffcb76fd6fb221d76ac2ee2b8584e9/raw/4054f90adfd361b7aa4255e99c2e874664094cea/climate.csv', 
    'climate.txt')

('climate.txt', <http.client.HTTPMessage at 0x257dacfab20>)

In [48]:
climate_data=np.genfromtxt('climate.txt',delimiter=',',skip_header=1)

In [49]:
climate_data

array([[25., 76., 99.],
       [39., 65., 70.],
       [59., 45., 77.],
       ...,
       [99., 62., 58.],
       [70., 71., 91.],
       [92., 39., 76.]])

In [50]:
climate_data.shape

(10000, 3)

In [51]:
weights = np.array([0.3, 0.2, 0.5])

In [52]:
yields=climate_data@weights

In [53]:
yields

array([72.2, 59.7, 65.2, ..., 71.1, 80.7, 73.4])

In [54]:
yields.shape

(10000,)

### <font color="red">Doubt</font>

In [61]:
climate_results=np.concatenate((climate_data,yields.reshape(10000,1)),axis=1)

<font color=red>----------</font>

In [58]:
climate_results

array([[25. , 76. , 99. , 72.2],
       [39. , 65. , 70. , 59.7],
       [59. , 45. , 77. , 65.2],
       ...,
       [99. , 62. , 58. , 71.1],
       [70. , 71. , 91. , 80.7],
       [92. , 39. , 76. , 73.4]])

In [62]:
np.savetxt('climate_results.txt',climate_results,fmt='%.2f',delimiter=',',header='temperature,rainfall,humidity,yield_apples',comments='')

### <u>Arithmetic operations

In [64]:
arr2 = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 1, 2, 3]])

In [68]:
arr3 = np.array([[11, 12, 13, 14], 
                 [15, 16, 17, 18], 
                 [19, 11, 12, 13]])

In [69]:
# Adding a scalar
arr2+3

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12,  4,  5,  6]])

In [70]:
# Element-wise subtraction
arr3-arr2

array([[10, 10, 10, 10],
       [10, 10, 10, 10],
       [10, 10, 10, 10]])

In [71]:
# Division by scalar
arr2/2

array([[0.5, 1. , 1.5, 2. ],
       [2.5, 3. , 3.5, 4. ],
       [4.5, 0.5, 1. , 1.5]])

In [72]:
#Element-wise multiplication
arr2*arr3

array([[ 11,  24,  39,  56],
       [ 75,  96, 119, 144],
       [171,  11,  24,  39]])

In [73]:
#Modulus with scalar
arr2%4

array([[1, 2, 3, 0],
       [1, 2, 3, 0],
       [1, 1, 2, 3]], dtype=int32)

### <u>Array Broadcasting
    Numpy arrays also support broadcasting, allowing arithmetic operations between two arrays with different numbers of dimensions but compatible shapes.

In [74]:
arr2 = np.array([[1, 2, 3, 4], 
                 [5, 6, 7, 8], 
                 [9, 1, 2, 3]])

In [75]:
arr2.shape

(3, 4)

In [76]:
arr4 = np.array([4, 5, 6, 7])

In [77]:
arr4.shape

(4,)

In [78]:
arr2+arr4

array([[ 5,  7,  9, 11],
       [ 9, 11, 13, 15],
       [13,  6,  8, 10]])

In [79]:
arr5=np.array([7,8])

In [80]:
arr5.shape

(2,)

In [81]:
arr2+arr5

ValueError: operands could not be broadcast together with shapes (3,4) (2,) 

### <u>Array Comparison

In [82]:
arr1 = np.array([[1, 2, 3], [3, 4, 5]])
arr2 = np.array([[2, 2, 3], [1, 2, 5]])

In [83]:
# Element-wise comparison takes place
arr1==arr2

array([[False,  True,  True],
       [False, False,  True]])

In [85]:
arr1!=arr2

array([[ True, False, False],
       [ True,  True, False]])

In [86]:
arr1>=arr2

array([[False,  True,  True],
       [ True,  True,  True]])

In [87]:
arr1<arr2

array([[ True, False, False],
       [False, False, False]])

In [88]:
(arr1==arr2).sum()  # we can count no. of equal elements like this

3

### <u>Array Indexing and slicing

In [89]:
arr3 = np.array([
    [[11, 12, 13, 14], 
     [13, 14, 15, 19]], 
    
    [[15, 16, 17, 21], 
     [63, 92, 36, 18]], 
    
    [[98, 32, 81, 23],      
     [17, 18, 19.5, 43]]])

In [90]:
arr3.shape

(3, 2, 4)

In [91]:
#single element
arr3[1,1,2]

36.0

In [94]:
#subarray using ranges
arr3[0:,1:,:2]

array([[[13., 14.]],

       [[63., 92.]],

       [[17., 18.]]])

In [95]:
#mixing indices and ranges
arr3[1:,1,3]

array([18., 43.])

In [96]:
arr3[1:,1,:3]

array([[63. , 92. , 36. ],
       [17. , 18. , 19.5]])

In [97]:
#using fewer indices
arr3[1]

array([[15., 16., 17., 21.],
       [63., 92., 36., 18.]])

### <u>Other ways of creating Numpy Arrays

In [99]:
#All zeros
np.zeros((3,2))

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [100]:
#All ones
np.ones((2,2,3))

array([[[1., 1., 1.],
        [1., 1., 1.]],

       [[1., 1., 1.],
        [1., 1., 1.]]])

In [101]:
#Identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [102]:
#random vector
np.random.rand(5)

array([0.50590697, 0.96878201, 0.09335207, 0.20550196, 0.05705521])

In [103]:
#random matrix
np.random.randn(2,3)

array([[-1.04809262, -1.22721069,  0.6908481 ],
       [ 0.77279874,  1.09524804, -0.20811387]])

In [104]:
#fixed value
np.full([2,3],42)

array([[42, 42, 42],
       [42, 42, 42]])

In [105]:
np.arange(10,90,3)

array([10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58,
       61, 64, 67, 70, 73, 76, 79, 82, 85, 88])

In [106]:
#eqally spaced numbers in a range
np.linspace(3,27,9)   #here we wanted 9 numbers between 3 and 27 with equal space.. we caqn also get any other number of elements 

array([ 3.,  6.,  9., 12., 15., 18., 21., 24., 27.])

In [107]:
np.linspace(2,25,3)

array([ 2. , 13.5, 25. ])

# <u>Reading from and writing to files using Python

In [108]:
import os

In [109]:
os.getcwd()

'C:\\Users\\himan\\Data Analysis'

In [110]:
help(os.listdir)

Help on built-in function listdir in module nt:

listdir(path=None)
    Return a list containing the names of the files in the directory.
    
    path can be specified as either str, bytes, or a path-like object.  If path is bytes,
      the filenames returned will also be bytes; in all other circumstances
      the filenames returned will be str.
    If path is None, uses the path='.'.
    On some platforms, path may also be specified as an open file descriptor;\
      the file descriptor must refer to a directory.
      If this functionality is unavailable, using it raises NotImplementedError.
    
    The list is in arbitrary order.  It does not include the special
    entries '.' and '..' even if they are present in the directory.



In [112]:
#relative path
os.listdir('.')

['.ipynb_checkpoints', 'climate.txt', 'climate_results.txt', 'numpy.ipynb']

In [113]:
#absolute path
os.listdir('/Users')

['All Users', 'Default', 'Default User', 'desktop.ini', 'himan', 'Public']

In [114]:
os.makedirs('./data',exist_ok=True)

In [115]:
os.listdir('.')

['.ipynb_checkpoints',
 'climate.txt',
 'climate_results.txt',
 'data',
 'numpy.ipynb']

In [116]:
'data' in os.listdir('.')

True

In [117]:
os.listdir('./data')

[]

In [118]:
url1 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans1.txt'
url2 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans2.txt'
url3 = 'https://gist.githubusercontent.com/aakashns/257f6e6c8719c17d0e498ea287d1a386/raw/7def9ef4234ddf0bc82f855ad67dac8b971852ef/loans3.txt'

In [119]:
from urllib.request import urlretrieve

In [120]:
urlretrieve(url1,'./data/loans1.txt')

('./data/loans1.txt', <http.client.HTTPMessage at 0x257dace9610>)

In [121]:
urlretrieve(url2, './data/loans2.txt')

('./data/loans2.txt', <http.client.HTTPMessage at 0x257db209040>)

In [122]:
urlretrieve(url3, './data/loans3.txt')

('./data/loans3.txt', <http.client.HTTPMessage at 0x257db4dd520>)

In [123]:
os.listdir('./data')

['loans1.txt', 'loans2.txt', 'loans3.txt']

In [124]:
file1=open('./data/loans1.txt',mode='r')

In [125]:
file1_contents=file1.read()

In [126]:
file1_contents

'amount,duration,rate,down_payment\n100000,36,0.08,20000\n200000,12,0.1,\n628400,120,0.12,100000\n4637400,240,0.06,\n42900,90,0.07,8900\n916000,16,0.13,\n45230,48,0.08,4300\n991360,99,0.08,\n423000,27,0.09,47200'

In [127]:
print(file1_contents)

amount,duration,rate,down_payment
100000,36,0.08,20000
200000,12,0.1,
628400,120,0.12,100000
4637400,240,0.06,
42900,90,0.07,8900
916000,16,0.13,
45230,48,0.08,4300
991360,99,0.08,
423000,27,0.09,47200


In [129]:
file1.close()

In [130]:
file1.read()

ValueError: I/O operation on closed file.

### <u>Closing files automatically using `with`

In [131]:
with open('./data/loans2.txt') as file2:
    file2_contents=file2.read()
    print(file2_contents)

amount,duration,rate,down_payment
828400,120,0.11,100000
4633400,240,0.06,
42900,90,0.08,8900
983000,16,0.14,
15230,48,0.07,4300


In [132]:
file2.read()

ValueError: I/O operation on closed file.

### <u>Reading a file line by line

In [133]:
with open('./data/loans3.txt','r') as file3: 
    file3_lines=file3.readlines()

In [134]:
file3_lines

['amount,duration,rate,down_payment\n',
 '45230,48,0.07,4300\n',
 '883000,16,0.14,\n',
 '100000,12,0.1,\n',
 '728400,120,0.12,100000\n',
 '3637400,240,0.06,\n',
 '82900,90,0.07,8900\n',
 '316000,16,0.13,\n',
 '15230,48,0.08,4300\n',
 '991360,99,0.08,\n',
 '323000,27,0.09,4720010000,36,0.08,20000\n',
 '528400,120,0.11,100000\n',
 '8633400,240,0.06,\n',
 '12900,90,0.08,8900']

### <u>Processing data from files

In [137]:
def parse_headers(header_line):
    return header_line.strip().split(',')

In [139]:
file3_lines[0]

'amount,duration,rate,down_payment\n'

In [140]:
headers=parse_headers(file3_lines[0])

In [141]:
headers

['amount', 'duration', 'rate', 'down_payment']

In [147]:
def parse_values(data_line):
    values = []
    for item in data_line.strip().split(','):
        if item == '':
            values.append(0.0)
        else:
            try:
                values.append(float(item))
            except ValueError:
                values.append(item)
    return values

In [148]:
file3_lines[1]

'45230,48,0.07,4300\n'

In [149]:
parse_values(file3_lines[1])

[45230.0, 48.0, 0.07, 4300.0]

In [150]:
file3_lines[2]

'883000,16,0.14,\n'

In [151]:
parse_values(file3_lines[2])

[883000.0, 16.0, 0.14, 0.0]