### Adendo

In [13]:
import pandas as pd
obj = pd.Series([4, 12, -5, 3, 5])

In [14]:
print(obj.values)
print(obj.index)

[ 4 12 -5  3  5]
RangeIndex(start=0, stop=5, step=1)


In [18]:
obj.index = ['a', 'b', 'c', 'd', 'e']
obj.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [27]:
print(pd.isnull(obj).head(2))
print()
print(pd.notnull(obj).head(2))

a    False
b    False
dtype: bool

a    True
b    True
dtype: bool


In [34]:
obj.name = 'numbers'
obj.index.name = "index"
obj.head(2)

index
a     4
b    12
Name: numbers, dtype: int64

https://www.machinelearningplus.com/python/101-pandas-exercises-python/

### Import pandas and print the version

In [63]:
import pandas as pd
pd.__version__

'0.25.1'

### Create a pandas series from each of the items below: a list, numpy and a dictionary

In [64]:
import numpy as np
mylist = list('abcedfghijklmnopqrstuvwxyz')
myarr = np.arange(26)
mydict = dict(zip(mylist, myarr))

In [80]:
s1 = pd.Series(mylist)
s2 = pd.Series(myarr)
s3 = pd.Series(mydict)

print(s3.head()) # .head(n=5) mostra as primeiras n "x" linhas

a    0
b    1
c    2
e    3
d    4
dtype: int64


### Convert the series s3 into a dataframe with its index as another column on the dataframe.

In [124]:
pd.DataFrame(s3.head(3))

Unnamed: 0,0
a,0
b,1
c,2


In [123]:
# solution
df = s3.to_frame().reset_index()
print(df.head(3))

  index  0
0     a  0
1     b  1
2     c  2


### Combine ser1 and ser2 to form a dataframe.

In [87]:
ser1 = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))
ser2 = pd.Series(np.arange(26))

In [122]:
pd.DataFrame({"col1":ser1,"col2":ser2}).head(3)

Unnamed: 0,col1,col2
0,a,0
1,b,1
2,c,2


In [121]:
# other solution
df = pd.concat([ser1, ser2], axis=1)
df.head(3)

Unnamed: 0,0,1
0,a,0
1,b,1
2,c,2


### Give a name to the series ser calling it ‘alphabets’.

In [112]:
ser = pd.Series(list('abcedfghijklmnopqrstuvwxyz'))

In [118]:
pd.DataFrame({"alfabeto":ser}).head(3)

Unnamed: 0,alfabeto
0,a
1,b
2,c


In [119]:
# solution
ser.name = 'alphabets'
ser.head(3)

0    a
1    b
2    c
Name: alphabets, dtype: object

In [120]:
# Obs:
pd.DataFrame(ser).head(3)

Unnamed: 0,alphabets
0,a
1,b
2,c


### From ser1 remove items present in ser2.

In [130]:
ser1 = pd.Series([1, 2, 3, 4, 5])
ser2 = pd.Series([4, 5, 6, 7, 8])

In [138]:
ser1[~ser1.isin(ser2)] # neste caso é subtração de conjuntos / sem "~" torna-se a intersseção

0    1
1    2
2    3
dtype: int64

### Get all items of ser1 and ser2 not common to both.

In [143]:
ser1[~ser1.isin(ser2)].append(ser2)

0    1
1    2
2    3
0    4
1    5
2    6
3    7
4    8
dtype: int64

In [144]:
# solution
import numpy as np
ser_u = pd.Series(np.union1d(ser1, ser2))  # union
ser_i = pd.Series(np.intersect1d(ser1, ser2))  # intersect
ser_u[~ser_u.isin(ser_i)]

0    1
1    2
2    3
5    6
6    7
7    8
dtype: int64

### Compute the minimum, median, and maximum of ser.

In [145]:
ser = pd.Series(np.random.normal(10, 5, 25))

In [147]:
ser.min()

0.7767006856653005

In [148]:
ser.mean()

10.519794817453635

In [149]:
ser.max()

17.39489148445464

### Calculte the frequency counts of each unique value ser.

In [150]:
ser = pd.Series(np.take(list('abcdefgh'), np.random.randint(8, size=30)))

In [157]:
ser.nunique() # conta quantos valores são únicos / .unique() mostra quais estes valores

8

In [158]:
# solution
ser.value_counts()

f    7
e    7
c    3
a    3
b    3
h    3
g    2
d    2
dtype: int64

### From ser, keep the top 2 most frequent items as it is and replace everything else as ‘Other’.

In [176]:
np.random.RandomState(100)
ser = pd.Series(np.random.randint(1, 5, [12]))

In [177]:
ser.value_counts().index[:2] # index dos dois máximos

Int64Index([3, 1], dtype='int64')

In [178]:
# solution
print("Top 2 Freq:", ser.value_counts())
ser[~ser.isin(ser.value_counts().index[:2])] = 'Other'
ser

Top 2 Freq: 3    5
1    3
4    2
2    2
dtype: int64


0         1
1         1
2     Other
3         3
4         1
5         3
6         3
7     Other
8         3
9         3
10    Other
11    Other
dtype: object