In [None]:
import numpy as np
import pandas as pd

# Numpy

### 0-dimensional array

In [18]:
array0d = np.array(42)
print(array0d)

42


### 1-dimensional array

In [19]:
array1d = np.array([1, 2, 3, 4, 5])
print(array1d)

[1 2 3 4 5]


### 2-dimensional array

In [20]:
array2d = np.array([[1, 2, 3], [4, 5, 6]])
print(array2d)

[[1 2 3]
 [4 5 6]]


### Create an array filled with random values

In [23]:
e = np.random.random((2,2))  
print(e)  

[[0.53286234 0.86012026]
 [0.24465862 0.96207117]]


### Vectorised operations

In [24]:
rng=np.arange(10)
print(rng)
print(np.sqrt(rng))

[0 1 2 3 4 5 6 7 8 9]
[0.         1.         1.41421356 1.73205081 2.         2.23606798
 2.44948974 2.64575131 2.82842712 3.        ]


### Extracting dimensions of a Numpy array

In [25]:
a=np.array([[1.0,2.0,4.0],[-1.0,2.0,-5.0]])
print(a)
print(a.shape)

[[ 1.  2.  4.]
 [-1.  2. -5.]]
(2, 3)


### Sums and sample means

In [26]:
print(a)
print(a.sum())
print(a.sum(axis=0))
print(a.mean())
print(a.mean(axis=1))

[[ 1.  2.  4.]
 [-1.  2. -5.]]
3.0
[ 0.  4. -1.]
0.5
[ 2.33333333 -1.33333333]


### Transposing matrices

In [27]:
a=np.array([[1.0,2.0,4.0],[-1.0,2.0,-5.0]])
b=np.transpose(a)   # b is the transpose of a
print(b.shape)
print(b)

(3, 2)
[[ 1. -1.]
 [ 2.  2.]
 [ 4. -5.]]


### Subsetting

In [29]:
arr=np.random.random((3,3))
print(arr)
arr[arr<0.5]=0
print(arr)
print(np.sum((arr<0) & (arr>1)))
print(np.sum((arr<0) | (arr>0.7)))

[[0.6923     0.0374496  0.83547132]
 [0.55687161 0.29336483 0.18116835]
 [0.46204864 0.85967797 0.29305775]]
[[0.6923     0.         0.83547132]
 [0.55687161 0.         0.        ]
 [0.         0.85967797 0.        ]]
0
2


# Pandas

### Setting up a pandas data frame

In [None]:
data = np.zeros((2, ), dtype=[('A', 'i4'), ('B', 'f4'), ('C', 'a10')])
pd.DataFrame(data, index=['first', 'second'])

### Setting up another pandas data frame

In [30]:
d = {'one': [1., 2., 3., 4.],
   'two': [4., 3., 2., 1.]}
df = pd.DataFrame(d)
df

Unnamed: 0,one,two
0,1.0,4.0
1,2.0,3.0
2,3.0,2.0
3,4.0,1.0


### Getting descriptive statistics of a pandas data frame

In [31]:
df.describe()

Unnamed: 0,one,two
count,4.0,4.0
mean,2.5,2.5
std,1.290994,1.290994
min,1.0,1.0
25%,1.75,1.75
50%,2.5,2.5
75%,3.25,3.25
max,4.0,4.0


### Subsetting a pandas data frame

In [32]:
d = {'one': [1., 2., 3., 4.],
   'two': [4., 3., 2., 1.]}
df=pd.DataFrame(d)
print(df['one'])
print(df['two'])

0    1.0
1    2.0
2    3.0
3    4.0
Name: one, dtype: float64
0    4.0
1    3.0
2    2.0
3    1.0
Name: two, dtype: float64
