# Numpy and Pandas

In [None]:
# pip install numpy, pandas

In [1]:
import numpy as np
import pandas as pd

In [2]:
# Numpy Dimensions / Arrays 

In [4]:
list1 = [1,2,3,4,5,6,7,8]

np_list = np.array(list1)

print(list1)
print(np_list)

[1, 2, 3, 4, 5, 6, 7, 8]
[1 2 3 4 5 6 7 8]


In [5]:
print(type(list1))
print(type(np_list))

<class 'list'>
<class 'numpy.ndarray'>


In [6]:
id(np_list)

2530451238128

### 0-D Array

In [7]:
age = 40

arr0d =np.array(age)

print(arr0d, type(arr0d))

40 <class 'numpy.ndarray'>


### 1-D Array

In [8]:
list2 = [2,4,6,8]

arr1d = np.array(list2)

print(arr1d, type(arr1d))

[2 4 6 8] <class 'numpy.ndarray'>


In [9]:
print(arr1d.shape)

(4,)


### 2-D Array

In [11]:
list3 = [[1,2,3,4],[5,6,7,8]]

arr2d = np.array(list3)

print(arr2d)
print("-"*20)
print(type(arr2d))
print("-"*20)
print(arr2d.shape)

[[1 2 3 4]
 [5 6 7 8]]
--------------------
<class 'numpy.ndarray'>
--------------------
(2, 4)


### Creating random numbers

In [16]:
np.random.randint(1,11)

9

In [17]:
np.random.randn(10)

array([-0.2904159 ,  1.0263217 ,  0.85323976,  0.32671889, -0.3442422 ,
        0.29640186, -1.59358685, -0.37429695,  0.67970533, -0.0543861 ])

### create a range of numbers

In [18]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [19]:
np.arange(5)

array([0, 1, 2, 3, 4])

In [21]:
# optional args for the starting point and 
# step size. also called increment

nos = np.arange(1,20,2)

print(nos)

[ 1  3  5  7  9 11 13 15 17 19]


In [22]:
# is this a 1-D or 2_d array?

np.linspace(1,11,20)

array([ 1.        ,  1.52631579,  2.05263158,  2.57894737,  3.10526316,
        3.63157895,  4.15789474,  4.68421053,  5.21052632,  5.73684211,
        6.26315789,  6.78947368,  7.31578947,  7.84210526,  8.36842105,
        8.89473684,  9.42105263,  9.94736842, 10.47368421, 11.        ])

# Arithmetic operations on numpy arrays

In [23]:
arr = np.arange(0,11)
arr2 =  np.arange(0,21,2)

In [24]:
print(arr,'\n',arr2)

[ 0  1  2  3  4  5  6  7  8  9 10] 
 [ 0  2  4  6  8 10 12 14 16 18 20]


In [25]:
print(arr + arr2)

[ 0  3  6  9 12 15 18 21 24 27 30]


In [26]:
print(arr2 - arr)

[ 0  1  2  3  4  5  6  7  8  9 10]


In [27]:
print(arr * arr2)

[  0   2   8  18  32  50  72  98 128 162 200]


# PANDAS

#### Series and Dataframes

In [28]:
# Series equivalent to columns
# Dataframes equivalent to tables (cols and rows)

In [29]:
emps=np.array(["Joy","Lola","Nike","Tola"])

In [30]:
s=pd.Series(emps)

In [31]:
s

0     Joy
1    Lola
2    Nike
3    Tola
dtype: object

In [32]:
print(type(s))

<class 'pandas.core.series.Series'>


In [33]:
# Dataframes

In [37]:
calls = np.random.randint(1,10,4)
calls

array([4, 7, 6, 6])

In [41]:
calls = np.ceil(np.random.rand(4,4)*20)
calls

array([[ 5., 17., 10.,  2.],
       [ 4., 14., 11.,  3.],
       [15., 11., 20., 13.],
       [17.,  8., 13., 12.]])

In [42]:
df = pd.DataFrame(calls)

In [43]:
df

Unnamed: 0,0,1,2,3
0,5.0,17.0,10.0,2.0
1,4.0,14.0,11.0,3.0
2,15.0,11.0,20.0,13.0
3,17.0,8.0,13.0,12.0


In [44]:
df = pd.DataFrame(calls, columns=emps)

In [45]:
df

Unnamed: 0,Joy,Lola,Nike,Tola
0,5.0,17.0,10.0,2.0
1,4.0,14.0,11.0,3.0
2,15.0,11.0,20.0,13.0
3,17.0,8.0,13.0,12.0


In [46]:
df = pd.DataFrame(calls, index=emps)

In [47]:
df

Unnamed: 0,0,1,2,3
Joy,5.0,17.0,10.0,2.0
Lola,4.0,14.0,11.0,3.0
Nike,15.0,11.0,20.0,13.0
Tola,17.0,8.0,13.0,12.0


In [48]:
### create a date range with pandas

In [49]:
dates = pd.date_range("20230619", periods=4)
dates

DatetimeIndex(['2023-06-19', '2023-06-20', '2023-06-21', '2023-06-22'], dtype='datetime64[ns]', freq='D')

In [50]:
df = pd.DataFrame(calls, index=dates,
                  columns=emps)
df

Unnamed: 0,Joy,Lola,Nike,Tola
2023-06-19,5.0,17.0,10.0,2.0
2023-06-20,4.0,14.0,11.0,3.0
2023-06-21,15.0,11.0,20.0,13.0
2023-06-22,17.0,8.0,13.0,12.0


### Opening and reading files with pandas 

In [51]:
# note the file location/filepath
# note the file type/ extension

In [52]:
path = "datasets/zoo.csv"
pd.read_csv(path)

Unnamed: 0,animal,uniq_id,water_need
0,elephant,1001,500
1,elephant,1002,600
2,elephant,1003,550
3,tiger,1004,300
4,tiger,1005,320
5,tiger,1006,330
6,tiger,1007,290
7,tiger,1008,310
8,zebra,1009,200
9,zebra,1010,220


In [54]:
pd.read_csv("datasets/zoo_eats.csv", delimiter=";")

Unnamed: 0,animal,food
0,elephant,vegetables
1,tiger,meat
2,kangaroo,vegetables
3,zebra,vegetables
4,giraffe,vegetables


In [55]:
# using variables to store pandas tables
path = "datasets/zoo.csv"
zoo = pd.read_csv(path)
zoo_eats = pd.read_csv("datasets/zoo_eats.csv", delimiter=";")

In [57]:
zoo_eats

Unnamed: 0,animal,food
0,elephant,vegetables
1,tiger,meat
2,kangaroo,vegetables
3,zebra,vegetables
4,giraffe,vegetables


In [58]:
# first 5 rows
zoo.head()

Unnamed: 0,animal,uniq_id,water_need
0,elephant,1001,500
1,elephant,1002,600
2,elephant,1003,550
3,tiger,1004,300
4,tiger,1005,320


In [59]:
zoo_eats

Unnamed: 0,animal,food
0,elephant,vegetables
1,tiger,meat
2,kangaroo,vegetables
3,zebra,vegetables
4,giraffe,vegetables


In [61]:
# combine both using merge()
zoo_full = zoo.merge(zoo_eats, on="animal")
zoo_full

Unnamed: 0,animal,uniq_id,water_need,food
0,elephant,1001,500,vegetables
1,elephant,1002,600,vegetables
2,elephant,1003,550,vegetables
3,tiger,1004,300,meat
4,tiger,1005,320,meat
5,tiger,1006,330,meat
6,tiger,1007,290,meat
7,tiger,1008,310,meat
8,zebra,1009,200,vegetables
9,zebra,1010,220,vegetables


In [62]:
# save to file
zoo_full.to_csv("datasets/zoo_merged.csv",
                sep=",", index=None)

In [None]:
# pd.read_