### Vector, matrices and multidimentional arrays

In [1]:
import numpy as np
import pandas

In [4]:
a = np.arange(1,15,2)
print(a)

[ 1  3  5  7  9 11 13]


In [5]:
#sqrt
b = np.sqrt(a)

In [6]:
np.exp(a)

array([2.71828183e+00, 2.00855369e+01, 1.48413159e+02, 1.09663316e+03,
       8.10308393e+03, 5.98741417e+04, 4.42413392e+05])

In [7]:
np.add(a,b)

array([ 2.        ,  4.73205081,  7.23606798,  9.64575131, 12.        ,
       14.31662479, 16.60555128])

### Conditional Clause and Boolean Operation

In [8]:
x = np.array([100,400,500,600]) # let each data item be 'a'
y = np.array([10,15,20,25]) # let each data item be 'b'

condition = np.array([True, True, False, False])

In [9]:
# use loop indirectly to perform this
z = [a if cond else b for a, cond, b in zip(x, condition,y)]

In [10]:
print(z)

[100, 400, 20, 25]


In [11]:
# np.where(condition, value-for-yes, value-for-no)
z2 = np.where(condition, x, y)

In [12]:
print(z2)

[100 400  20  25]


In [13]:
# Standard function
z2.sum()

545

In [14]:
#Sum column

n = np.array([[1,2],[3,4]])
print(n)

[[1 2]
 [3 4]]


In [15]:
# column sum
print(n.sum(0))

[4 6]


In [16]:
# row sum
print(n.sum(1))

[3 7]


In [17]:
print(n.mean())
print(n.std())
print(n.var())

2.5
1.118033988749895
1.25


In [18]:
condition2 = np.array([True, False, True])

In [19]:
# condition2.any() # or operator
condition2.all() # and operator

False

In [20]:
array2 = np.array(['solid', 'solid', 'liquid', 'solid', 'liquid'])

In [21]:
print(np.unique(array2))

['liquid' 'solid']


In [22]:
# Sorting in numpy array
unsorted = np.array([1,2,8,10,7,9,4])

In [23]:
sorted_array = np.sort(unsorted)

In [24]:
print(sorted_array)

[ 1  2  4  7  8  9 10]


## 2. Data Manipulation with Pandas

In [25]:
import pandas as pd
from pandas import Series, DataFrame

In [26]:
obj1 = Series([5,10,15,20])

In [27]:
print(obj1)

0     5
1    10
2    15
3    20
dtype: int64


In [28]:
#use numpy array to series
data_array = np.array(['a', 'b', 'c'])

s = Series(data_array)

In [29]:
# custom index
s = Series(data_array, index=[100,101,102])
print(s)

100    a
101    b
102    c
dtype: object


In [30]:
revenue = Series([20,80,40,35], index=['ola', 'uber', 'grab','goje'])
print(revenue)
# print(revenue['ola'])
# print(revenue[revenue>=34])

ola     20
uber    80
grab    40
goje    35
dtype: int64


In [31]:
revenue_dict = revenue.to_dict()
print(revenue_dict)
print(type(revenue_dict))

{'ola': 20, 'uber': 80, 'grab': 40, 'goje': 35}
<class 'dict'>


In [32]:
index2 = ['musa', 'saleh', 'isa', 'modu', 'tanko']
revenue2 = Series(revenue, index2)
# print(revenue2)
pd.isnull(revenue2)

musa     True
saleh    True
isa      True
modu     True
tanko    True
dtype: bool

In [33]:
print(revenue + revenue2)

goje    NaN
grab    NaN
isa     NaN
modu    NaN
musa    NaN
ola     NaN
saleh   NaN
tanko   NaN
uber    NaN
dtype: float64


### DATAFRAME

In [34]:
series_a = Series([100,200,300], index=['a', 'b', 'c'])
series_b = Series([300,400,500,600], index=['a', 'b', 'c', 'd'])

In [35]:
#Sum of series
print(series_a + series_b)

a    400.0
b    600.0
c    800.0
d      NaN
dtype: float64


In [65]:
df1 = DataFrame(np.arange(9).reshape(3,3), columns=['a','b', 'c'], index=['Rank', 'Name', 'Industry'])

In [66]:
print(df1)

          a  b  c
Rank      0  1  2
Name      3  4  5
Industry  6  7  8


In [67]:
# head and tail
print(df1.head(1))

      a  b  c
Rank  0  1  2


In [68]:
print(df1.tail(1))

          a  b  c
Industry  6  7  8


In [70]:
# assign values to df
array1 = np.array([9,10,11])

df1['d'] = array1

print(df1)

          a  b  c   d
Rank      0  1  2   9
Name      3  4  5  10
Industry  6  7  8  11


In [80]:
#deletion
# del df1['d']
# print(df1)

In [82]:
series_a = Series([100,200,300], index=['a', 'b', 'c'])
series_b = Series([300, 400, 500, 600], index=['a', 'b', 'c', 'd'])

In [84]:
# SUM of series
print(series_a + series_b)

a    400.0
b    600.0
c    800.0
d      NaN
dtype: float64


In [89]:
df1 = DataFrame(np.arange(4).reshape(2,2), columns=['a','b'], index=['car', 'bike'])
df2 = DataFrame(np.arange(9).reshape(3,3), columns=['a','b', 'c'], index=['car', 'bike', 'cycle'])

In [91]:
print(df1 + df2)

         a    b   c
bike   5.0  7.0 NaN
car    0.0  2.0 NaN
cycle  NaN  NaN NaN


In [93]:
df1 = df1.add(df2, fill_value=0)
print(df1)

          a     b     c
bike    8.0  11.0  10.0
car     0.0   3.0   4.0
cycle  12.0  14.0  16.0
