In [1]:
!pip install numpy pandas matplotlib

Defaulting to user installation because normal site-packages is not writeable



# Why Use NumPy?
```
In Python we have lists that serve the purpose of arrays, but they are slow to process.

NumPy aims to provide an array object that is up to 50x faster than traditional Python lists.

The array object in NumPy is called ndarray, it provides a lot of supporting functions that make working with ndarray very easy.

Arrays are very frequently used in data science, where speed and resources are very important.
```

## Pre-requisites:
python installed , know basics of python

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [3]:
#simple numpy arr:
arr = np.array([1, 2, 3, 4, 5]) 
arr

array([1, 2, 3, 4, 5])

In [4]:
type(arr)

numpy.ndarray

In [13]:
arr = np.array((1, 2, 3, 4, 5))
#using a tuple
arr.ndim

1

In [6]:
arr

array([1, 2, 3, 4, 5])

In [7]:
arr = np.array(42)
arr

array(42)

# 2-d array numpy

In [15]:
arr = np.array([[1, 2, 3], [4, 5, 6]])

In [16]:
arr , arr.ndim

(array([[1, 2, 3],
        [4, 5, 6]]),
 2)

In [17]:
arr = np.array([[[1, 2, 3], [4, 5, 6]], [[1, 2, 3], [4, 5, 6]]])

In [18]:
arr

array([[[1, 2, 3],
        [4, 5, 6]],

       [[1, 2, 3],
        [4, 5, 6]]])

In [19]:
arr.ndim

3

In [20]:
arr[1]

array([[1, 2, 3],
       [4, 5, 6]])

In [21]:
arr[0,0,0]

1

In [22]:
arr[0,0]

array([1, 2, 3])

In [24]:
arr = np.array([[1,2,3,4,5], [6,7,8,9,10]])
print('Last element from 2nd dim: ', arr[1, -1]) 

Last element from 2nd dim:  10


## Slicing arrays
```
Slicing in python means taking elements from one given index to another given index.

We pass slice instead of index like this: [start:end].

We can also define the step, like this: [start:end:step]. 
```

In [27]:
arr = np.array([1, 2, 3, 4, 5, 6, 7])
arr[3:6:2]

array([4, 6])

In [28]:
print(arr[4:]) 

[5 6 7]


In [29]:
print(arr[-3:-1]) 

[5 6]


In [30]:
print(arr[1:5:2]) 

[2 4]


In [32]:
arr = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]])

print(arr[1, 1:4]) 

[7 8 9]


In [33]:
print(arr[0:2, 2]) 

[3 8]


In [34]:
print(arr[0:2, 1:4]) 

[[2 3 4]
 [7 8 9]]


In [38]:
arr = np.array([1, 2, 3, 4, 5])
x = arr.copy()
arr[0] = 42

print(arr)
print(x) 

[42  2  3  4  5]
[1 2 3 4 5]


In [40]:
arr = np.array([1, 2, 3, 4, 5])
x = arr.view()
arr[0] = 42
x[2]=26
print(arr)
print(x) 

[42  2 26  4  5]
[42  2 26  4  5]


# shape

In [42]:
arr = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])

print(arr.shape) 

(2, 4)


# Reshaping numpy arrays

In [43]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

newarr = arr.reshape(4, 3)
newarr

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [44]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12])

newarr = arr.reshape(2, 3, 2)
newarr

array([[[ 1,  2],
        [ 3,  4],
        [ 5,  6]],

       [[ 7,  8],
        [ 9, 10],
        [11, 12]]])

In [45]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])

newarr = arr.reshape(3, 3)
newarr

ValueError: cannot reshape array of size 8 into shape (3,3)

In [56]:
arr = np.array([1, 2, 3, 4, 5, 6, 7, 8])

newarr = arr.reshape(2, 2 , -1)

In [57]:
newarr

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [58]:

arr = np.array([[1, 2, 3], [4, 5, 6]])

newarr = arr.reshape(-1)


In [59]:
newarr

array([1, 2, 3, 4, 5, 6])

# Iteration

In [60]:

arr = np.array([1, 2, 3])

for x in arr:
  print(x) 

1
2
3


In [61]:

arr = np.array([[1, 2, 3], [4, 5, 6]])

for x in arr:
  print(x) 

[1 2 3]
[4 5 6]


In [62]:
for x in arr:
  for y in x:
    print(y) 

1
2
3
4
5
6


In [63]:
arr = np.array([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])

for x in np.nditer(arr):
  print(x) 

1
2
3
4
5
6
7
8


In [72]:
newarr = arr.reshape((2,-1))

In [73]:
newarr

array([[1, 2, 3, 4],
       [5, 6, 7, 8]])

# CONCAT

In [74]:
arr1 = np.array([1, 2, 3])

arr2 = np.array([4, 5, 6])

arr = np.concatenate((arr1, arr2))


In [75]:
arr

array([1, 2, 3, 4, 5, 6])

In [90]:

arr1 = np.array([1, 2])

arr2 = np.array([ 3 , 4])

arr = np.concatenate((arr1, arr2))

In [91]:
arr

array([1, 2, 3, 4])

In [92]:
arr = np.array([3, 2, 0, 1])

print(np.sort(arr)) 

[0 1 2 3]


In [93]:

arr = np.array(['banana', 'cherry', 'apple'])

print(np.sort(arr)) 

['apple' 'banana' 'cherry']


In [94]:
arr = np.array([True, False, True])
print(np.sort(arr)) 

[False  True  True]


In [95]:

arr = np.array([[3, 2, 4], [5, 0, 1]])

print(np.sort(arr)) 

[[2 3 4]
 [0 1 5]]


# Random

In [97]:
from numpy import random

x=random.randint(100, size=(5))

print(x) 

[61 17  0 55 56]


In [99]:
np.random.randint(5,45,size=(2,2))

array([[28,  8],
       [43, 38]])

In [100]:
np.random.rand(5)

array([0.32953093, 0.71482222, 0.1688338 , 0.39707273, 0.14726963])

In [102]:
np.random.random(size=(5,5))

array([[0.00506386, 0.3616521 , 0.0552562 , 0.7384598 , 0.32950467],
       [0.25039153, 0.8533    , 0.57446615, 0.58390255, 0.64109373],
       [0.59453228, 0.12555525, 0.11096398, 0.87192633, 0.13840437],
       [0.26967402, 0.21818357, 0.61091112, 0.92962446, 0.83784764],
       [0.58815646, 0.55521391, 0.09111093, 0.27855426, 0.85958044]])

In [105]:
np.random.choice([5,6,9,8,7,12,11],size=(2))

array([7, 6])

In [106]:
x = [1, 2, 3, 4]
y = [4, 5, 6, 7]
x+y

[1, 2, 3, 4, 4, 5, 6, 7]

In [107]:
np.add(x,y)

array([ 5,  7,  9, 11])

# Pandas

In [108]:
import pandas as pd

In [120]:
## SERIES 

a = [1, 7, 2]

myvar = pd.Series(a)

In [110]:
myvar

0    1
1    7
2    2
dtype: int64

In [111]:
 print(myvar[0]) 

1


In [112]:
a = [1, 7, 2]

myvar = pd.Series(a, index = ["x", "y", "z"])
myvar

x    1
y    7
z    2
dtype: int64

In [113]:
calories = {"day1": 420, "day2": 380, "day3": 390}

myvar = pd.Series(calories)

In [114]:
myvar

day1    420
day2    380
day3    390
dtype: int64

In [115]:

calories = {"day1": 420, "day2": 380, "day3": 390}

myvar = pd.Series(calories, index = ["day1", "day2"])


In [116]:
myvar

day1    420
day2    380
dtype: int64

In [119]:
myvar['day1']

420

# Data Frame

In [126]:

data = {
  "calories": 45,
  "duration": 50
}

myvar = pd.DataFrame(data,index=[1])

In [127]:
myvar

Unnamed: 0,calories,duration
1,45,50


In [128]:

data = {
  "calories": [420, 380, 390],
  "duration": [50, 40, 45]
}

myvar = pd.DataFrame(data)

In [129]:
myvar

Unnamed: 0,calories,duration
0,420,50
1,380,40
2,390,45


<img src="https://storage.googleapis.com/lds-media/images/series-and-dataframe.width-1200.png" >