## NumPy Array and Vectorization

##### Convention for importing numpy

In [1]:
import numpy as np

In [2]:
arr = [6, 7, 8, 9]
print (type(arr))

<class 'list'>


In [3]:
print (arr)

[6, 7, 8, 9]


In [4]:
print (type(arr))

<class 'list'>


In [5]:
a = np.array(arr)

In [6]:
print (a)

[6 7 8 9]


In [7]:
print (type(a))

<class 'numpy.ndarray'>


In [8]:
print (a.shape)

(4,)


In [9]:
print (a.dtype)

int32


In [10]:
# get the dimension of a with ndim

In [11]:
print (a.ndim)

1


In [12]:
b = np.array([[1, 2, 3], [4, 5, 6]])

In [13]:
print (b)

[[1 2 3]
 [4 5 6]]


In [14]:
print (b.ndim)

2


In [15]:
b.shape

(2, 3)

In [16]:
print (type(b))

<class 'numpy.ndarray'>


#####  Inbuilt function that can be used to initialize numpy

In [17]:
# a 2x3 array with random values
np.random.random((2,3))

array([[0.19142583, 0.0120061 , 0.99067431],
       [0.42173515, 0.5791633 , 0.70370629]])

In [18]:
# a 2x3 array of zeros
np.zeros((2,3))

array([[0., 0., 0.],
       [0., 0., 0.]])

In [19]:
# a 2x3 array of ones
np.ones((2,3))

array([[1., 1., 1.],
       [1., 1., 1.]])

In [20]:
# a 3x3 identity matrix
np.identity(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

##### Intra-operability of arrays and scalars

In [21]:
c = np.array([[9.0, 8.0, 7.0], [1.0, 2.0, 3.0]])
d = np.array([[4.0, 5.0, 6.0], [9.0, 8.0, 7.0]])

In [22]:
c + d

array([[13., 13., 13.],
       [10., 10., 10.]])

In [23]:
c * d

array([[36., 40., 42.],
       [ 9., 16., 21.]])

In [24]:
5 / d

array([[1.25      , 1.        , 0.83333333],
       [0.55555556, 0.625     , 0.71428571]])

In [25]:
c ** 2

array([[81., 64., 49.],
       [ 1.,  4.,  9.]])

##### Indexing with arrays and using arrays for data processing

In [26]:
# The elements in the example arrays above can be accessed by indexing like lists in Python such that:

In [27]:
a[0]

6

In [28]:
a[3]

9

In [29]:
b[0, 0]

1

In [30]:
b[1, 2]

6

In [31]:
c[0, 1]

8.0

In [32]:
# Elements in arrays  can also be retrieved by slicing rows and columns or a combination of indexing and slicing.

In [33]:
d[1, 0:2]

array([9., 8.])

In [34]:
e = np.array([[10, 11, 12], [13, 14, 15], [16, 17, 18], [19, 20, 21]])

In [35]:
print (e)

[[10 11 12]
 [13 14 15]
 [16 17 18]
 [19 20 21]]


In [36]:
# Slicing

In [37]:
e[:3, :2]

array([[10, 11],
       [13, 14],
       [16, 17]])

In [38]:
# There are other advanced methods of indexing which are shown below.
# Integer indexing
e[[2, 0, 3, 1], [2, 1, 0, 2]]

array([18, 11, 19, 15])

In [39]:
# boolean indexing meeting a specified condition
e[e>15]

array([16, 17, 18, 19, 20, 21])

## Pandas - So Much More Than A Cute Animal

##### Convention for importing Pandas

In [40]:
import pandas as pd
import numpy as np

In [41]:
days = pd.Series(['Monday', 'Tuesday', 'Wednesday'])
print(days)

0       Monday
1      Tuesday
2    Wednesday
dtype: object


In [42]:
# Creating series with a numpy array
days_list = np.array(['Monday', 'Tuesday', 'Wednesday'])
numpy_days = pd.Series(days_list)
print(numpy_days)

0       Monday
1      Tuesday
2    Wednesday
dtype: object


In [43]:
# Using strings as index
days = pd.Series(['Monday', 'Tuesday', 'Wednesday'], index = ['a', 'b', 'c'])

# Create series from a dictionary
days1 = pd.Series({'a':'Monday', 'b':'Tuesday', 'c':'Wednesday'})
print(days)
print(days1)

a       Monday
b      Tuesday
c    Wednesday
dtype: object
a       Monday
b      Tuesday
c    Wednesday
dtype: object


In [44]:
# Accessing series
days[0]

'Monday'

In [45]:
days[1:]

b      Tuesday
c    Wednesday
dtype: object

In [46]:
days['c']

'Wednesday'

In [47]:
print(pd.DataFrame())

Empty DataFrame
Columns: []
Index: []


In [48]:
# Create a dataframe from a dictionary
df_dict = {
    'Country':["Ghana", 'Kenya', 'Nigeria', 'Togo'], 
    'Capital':["Accra", 'Nairobi', 'Abuja', 'Lome'], 
    'Population':[10000, 8500, 35000, 12000], 
    'Age': [60, 70, 80,75]
          }

df = pd.DataFrame(df_dict, index = [2, 4, 6, 8])

df

Unnamed: 0,Country,Capital,Population,Age
2,Ghana,Accra,10000,60
4,Kenya,Nairobi,8500,70
6,Nigeria,Abuja,35000,80
8,Togo,Lome,12000,75


In [49]:
df_list = [
    ["Ghana", 'Accra', 10000, 60], 
    ["Kenya", 'Nairobi', 8500, 70], 
    ['Nigeria', 'Abuja', 35000, 80 ], 
    ['Togo', 'Lome', 12000,75]
]

df1 = pd.DataFrame(df_list, columns = ['Country', 'Capital', 'Population', 'Age'], index = [2, 4, 6, 8] )

df1

Unnamed: 0,Country,Capital,Population,Age
2,Ghana,Accra,10000,60
4,Kenya,Nairobi,8500,70
6,Nigeria,Abuja,35000,80
8,Togo,Lome,12000,75


In [50]:
# select the row in the index 3
df.iloc[3]

Country        Togo
Capital        Lome
Population    12000
Age              75
Name: 8, dtype: object

In [51]:
# select the row with index label 6
df.loc[6]

Country       Nigeria
Capital         Abuja
Population      35000
Age                80
Name: 6, dtype: object

In [52]:
# Select the capital column
df['Capital']

2      Accra
4    Nairobi
6      Abuja
8       Lome
Name: Capital, dtype: object

In [53]:
df.at[6, 'Country']

'Nigeria'

In [54]:
df.iat[2, 0]

'Nigeria'

In [55]:
df["Population"].sum()

65500

In [56]:
df.mean()

  df.mean()


Population    16375.00
Age              71.25
dtype: float64

In [57]:
df.describe()

Unnamed: 0,Population,Age
count,4.0,4.0
mean,16375.0,71.25
std,12499.166639,8.539126
min,8500.0,60.0
25%,9625.0,67.5
50%,11000.0,72.5
75%,17750.0,76.25
max,35000.0,80.0


In [58]:
df_dict2 =  { 
        'Name': ['James', 'Yemen', 'Caro', np.nan], 
        'Profession': ['Researcher', 'Artist', 'Doctor', 'Writer'], 
        'Experience': [12, np.nan, 10, 8], 
        'Height': [np.nan, 175, 180, 150]
            }
new_df = pd.DataFrame(df_dict2)
new_df

Unnamed: 0,Name,Profession,Experience,Height
0,James,Researcher,12.0,
1,Yemen,Artist,,175.0
2,Caro,Doctor,10.0,180.0
3,,Writer,8.0,150.0


In [59]:
new_df.isnull()

Unnamed: 0,Name,Profession,Experience,Height
0,False,False,False,True
1,False,False,True,False
2,False,False,False,False
3,True,False,False,False


In [60]:
new_df.dropna()

Unnamed: 0,Name,Profession,Experience,Height
2,Caro,Doctor,10.0,180.0
