# NumPy
NumPy, short for Numerical Python, is one of the most important foundational packages for numerical computing in Python

In [1]:
import numpy as np

my_arr=np.arange(1_000_000)
print(my_arr)

my_list=list(range(1_000_000))
print(my_list[1:10])

[     0      1      2 ... 999997 999998 999999]
[1, 2, 3, 4, 5, 6, 7, 8, 9]


In [2]:
%timeit my_arr2=my_arr*2

813 µs ± 39.7 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [3]:
%timeit my_list2=[x*2 for x in my_list]

21.7 ms ± 567 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [4]:
# One of the key features of NumPy is its N-dimensional array object or ndarray, which is fast, flexible container for large datasets in Python.

data=np.array([[1.5,0.1,3],[0,-3,6.5]]) #2d array

data

array([[ 1.5,  0.1,  3. ],
       [ 0. , -3. ,  6.5]])

In [5]:
data.shape #for 1d it displays no. of elements and for other it displays no of rows and columns

(2, 3)

In [6]:
data.ndim # returns dimension 

2

In [7]:
#Size attribute returns an integer representing the total number of elements in the array or DataFrame.
# For a 1D array, it is the length of the array.
# For a 2D array, it is the product of the number of rows and columns.

data.size

6

In [8]:
data*10

array([[ 15.,   1.,  30.],
       [  0., -30.,  65.]])

In [9]:
data+data

array([[ 3. ,  0.2,  6. ],
       [ 0. , -6. , 13. ]])

In [11]:
data.dtype

dtype('float64')

In [12]:
data1=[6,7.5,8.0,1]

arr1=np.array(data1)

print(arr1)

[6.  7.5 8.  1. ]


In [13]:
arr1.ndim

1

In [15]:
arr1.size

4

In [16]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [18]:
np.zeros((3,4))

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [19]:
np.arange(15)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [22]:
arr1=np.array([1,2,3],dtype=np.float64)

arr2=np.array([1,2,3],dtype=np.int32)

print(arr1.dtype)

print(arr2.dtype)

float64
int32


In [23]:
arr=np.array([1,2,3,4,5], dtype=np.int32)

print(arr.dtype)

float_arr=arr.astype(np.float64)

print(float_arr.dtype)

print(float_arr)

int32
float64
[1. 2. 3. 4. 5.]


In [24]:
arr=np.array([[1.,2.,3.],[4.,5.,6.]])

arr

array([[1., 2., 3.],
       [4., 5., 6.]])

In [25]:
arr*arr

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [26]:
arr-arr

array([[0., 0., 0.],
       [0., 0., 0.]])

In [27]:
1/arr

array([[1.        , 0.5       , 0.33333333],
       [0.25      , 0.2       , 0.16666667]])

In [28]:
arr**2

array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

In [29]:
arr2=np.array([[0.,4.,1.],[7.,2.,12.]])

arr2

array([[ 0.,  4.,  1.],
       [ 7.,  2., 12.]])

In [30]:
arr2>arr

array([[False,  True, False],
       [ True, False,  True]])

In [31]:
arr=np.arange(10) # returns range of arrays
arr

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [32]:
arr.dtype  #returns type of array

dtype('int64')

In [38]:
arr[9] #indexing

np.int64(9)

In [39]:
arr[5:8] #slicing

array([5, 6, 7])

In [40]:
arr[5:8]= [0, 1, 2]
arr

array([0, 1, 2, 3, 4, 0, 1, 2, 8, 9])

In [41]:
arr[5:8]=12
arr

array([ 0,  1,  2,  3,  4, 12, 12, 12,  8,  9])

In [42]:
arr_slice=arr[5:8]

arr_slice

array([12, 12, 12])

In [43]:
arr_slice[1]=12345
arr_slice

array([   12, 12345,    12])

In [44]:
arr2d=np.array([[1,2,3],[4,5,6],[7,8,9]])

print(arr2d)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [45]:
arr2d.ndim

2

In [46]:
arr2d[2]

array([7, 8, 9])

In [48]:
arr2d[2][1]

np.int64(8)

In [49]:
arr2d[2,1]

np.int64(8)

In [50]:
arr2d[:2]

array([[1, 2, 3],
       [4, 5, 6]])

In [51]:
arr2d[:2,1:]

array([[2, 3],
       [5, 6]])

In [52]:
arr=np.arange(15).reshape((3,5))  #reshape method is used to change the shape of an array without changing its data.
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [53]:
arr=np.arange(15).reshape((5,3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [54]:
arr.T #.T attribute is used to transpose an array. Transposing an array means swapping its rows and columns. 

array([[ 0,  3,  6,  9, 12],
       [ 1,  4,  7, 10, 13],
       [ 2,  5,  8, 11, 14]])

In [55]:
array_3d = np.array([[[2, 1, 3], [2, 3, 4], [2, 4, 5], [9, 8, 7]]]) #3d array
array_3d

array([[[2, 1, 3],
        [2, 3, 4],
        [2, 4, 5],
        [9, 8, 7]]])

In [56]:
array_3d.ndim

3

In [57]:
array_3d.T

array([[[2],
        [2],
        [2],
        [9]],

       [[1],
        [3],
        [4],
        [8]],

       [[3],
        [4],
        [5],
        [7]]])

In [65]:
arr=np.array([[0,1,0],[1,2,-2],[6,3,2],[-1,0,-1],[1,0,1]])
arr

array([[ 0,  1,  0],
       [ 1,  2, -2],
       [ 6,  3,  2],
       [-1,  0, -1],
       [ 1,  0,  1]])

In [66]:
np.dot(arr.T,arr)

array([[39, 20, 12],
       [20, 14,  2],
       [12,  2, 10]])

# Pandas 

* Pandas contains data structures and data manipulation tools designed to make data cleaning and analysis fast and convinient in Python.
* Series & DataFrame
* Series is a one-dimensional array like object containing a sequence of value

In [67]:
import pandas as pd

In [68]:
obj= pd.Series([4,7,-5,3])
obj

0    4
1    7
2   -5
3    3
dtype: int64

In [69]:
obj2= pd.Series([4,7,-5,3],index=["d","b","a","c"])
obj2

d    4
b    7
a   -5
c    3
dtype: int64

In [70]:
obj2["a"] #indexing

np.int64(-5)

In [71]:
obj2[["c","a","d"]]

c    3
a   -5
d    4
dtype: int64

In [72]:
obj2= pd.Series([4,7,-5,3,5],index=["d","b","a","a","c"])
obj2

d    4
b    7
a   -5
a    3
c    5
dtype: int64

In [73]:
obj2[obj2>0]

d    4
b    7
a    3
c    5
dtype: int64

In [74]:
obj2*2

d     8
b    14
a   -10
a     6
c    10
dtype: int64

In [75]:
import numpy as np

np.exp(obj2) #exponential function

d      54.598150
b    1096.633158
a       0.006738
a      20.085537
c     148.413159
dtype: float64

In [77]:
sdata={"Ohio":35000,"Texas":71000,"Oregon":16000,"Utah":5000}

obj3=pd.Series(sdata)

obj3

Ohio      35000
Texas     71000
Oregon    16000
Utah       5000
dtype: int64

In [78]:
obj3.to_dict()  #converts a Series (or DataFrame) to a dictionary

{'Ohio': 35000, 'Texas': 71000, 'Oregon': 16000, 'Utah': 5000}

# Data Frame
* A DataFrame represents a rectangular table of data and contains an ordered, named collection of columns each of which can be a different value type.
* The DataFrame has both a row index and column index

In [79]:
data={"states":["Ohio","Ohio","Ohio","Nevada","Nevada","Nevada"],
      "year":[2000,2001,2002,2001,2002,2003],
      "pop":[1.5,1.7,3.6,2.4,2.9,3.2]
    }

frame=pd.DataFrame(data)

In [80]:
frame

Unnamed: 0,states,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [81]:
frame.head() # gives first 5 values from dataset

Unnamed: 0,states,year,pop
0,Ohio,2000,1.5
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9


In [82]:
frame.tail() # gives last 5 values from dataset

Unnamed: 0,states,year,pop
1,Ohio,2001,1.7
2,Ohio,2002,3.6
3,Nevada,2001,2.4
4,Nevada,2002,2.9
5,Nevada,2003,3.2


In [83]:
pd.DataFrame(data, columns=["year","states","pop"])

Unnamed: 0,year,states,pop
0,2000,Ohio,1.5
1,2001,Ohio,1.7
2,2002,Ohio,3.6
3,2001,Nevada,2.4
4,2002,Nevada,2.9
5,2003,Nevada,3.2


In [84]:
frame2=pd.DataFrame(data, columns=["year","states","pop","debt"])
frame2

Unnamed: 0,year,states,pop,debt
0,2000,Ohio,1.5,
1,2001,Ohio,1.7,
2,2002,Ohio,3.6,
3,2001,Nevada,2.4,
4,2002,Nevada,2.9,
5,2003,Nevada,3.2,


In [85]:
frame2.columns #tells names of columns in dataset

Index(['year', 'states', 'pop', 'debt'], dtype='object')

In [86]:
frame2["states"]

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: states, dtype: object

In [87]:
frame2.states

0      Ohio
1      Ohio
2      Ohio
3    Nevada
4    Nevada
5    Nevada
Name: states, dtype: object

In [88]:
frame2[["states","year"]]

Unnamed: 0,states,year
0,Ohio,2000
1,Ohio,2001
2,Ohio,2002
3,Nevada,2001
4,Nevada,2002
5,Nevada,2003


In [89]:
frame2.loc[1]  #he loc attribute allows you to select rows and columns by labels.

year      2001
states    Ohio
pop        1.7
debt       NaN
Name: 1, dtype: object

In [90]:
frame2.iloc[2] #iloc to access rows and columns by their integer position.

year      2002
states    Ohio
pop        3.6
debt       NaN
Name: 2, dtype: object

In [91]:
frame2.loc[2]

year      2002
states    Ohio
pop        3.6
debt       NaN
Name: 2, dtype: object

In [92]:
frame2["debt"]=[16.5, 12.3, 23.4, 34.2, 40.9, 54.8]
frame2

Unnamed: 0,year,states,pop,debt
0,2000,Ohio,1.5,16.5
1,2001,Ohio,1.7,12.3
2,2002,Ohio,3.6,23.4
3,2001,Nevada,2.4,34.2
4,2002,Nevada,2.9,40.9
5,2003,Nevada,3.2,54.8


In [93]:
frame2["debt"]=np.arange(6.)
frame2

Unnamed: 0,year,states,pop,debt
0,2000,Ohio,1.5,0.0
1,2001,Ohio,1.7,1.0
2,2002,Ohio,3.6,2.0
3,2001,Nevada,2.4,3.0
4,2002,Nevada,2.9,4.0
5,2003,Nevada,3.2,5.0


In [94]:
frame2["eastern"]=frame2["states"]=="Ohio"
frame2

Unnamed: 0,year,states,pop,debt,eastern
0,2000,Ohio,1.5,0.0,True
1,2001,Ohio,1.7,1.0,True
2,2002,Ohio,3.6,2.0,True
3,2001,Nevada,2.4,3.0,False
4,2002,Nevada,2.9,4.0,False
5,2003,Nevada,3.2,5.0,False


In [95]:
del frame2["eastern"]  #delete the column name

In [96]:
frame2.columns

Index(['year', 'states', 'pop', 'debt'], dtype='object')