In [110]:
import pandas as pd
import numpy as np
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [111]:
def print_serie_attr(serie, max_print = 10):
    print(f"Index:\n\t{serie.index[:max_print]}")
    print(f"DType:\n\t{serie.dtype}")
    print(f"Values:\n\t{serie.values[:max_print]}")
    print(f"Name:\n\t{serie.name}")

In [112]:
ice_cream = ["Strawberry”, “Vanilla”, “Chocolate"]
s0 = pd.Series(ice_cream)
print_serie_attr(s0)

Index:
	RangeIndex(start=0, stop=1, step=1)
DType:
	object
Values:
	['Strawberry”, “Vanilla”, “Chocolate']
Name:
	None


In [113]:
my_list = [10,20,30, np.nan] # [10,20,30, pd.NA] With this it does RAISE AN ERROR!!!
s1 = pd.Series(my_list)
print_serie_attr(s1)
s1.sum(min_count=4)
s1.prod()
s1.mean(skipna = False) # It will return a nan
pd.isna(s1)

Index:
	RangeIndex(start=0, stop=4, step=1)
DType:
	float64
Values:
	[10. 20. 30. nan]
Name:
	None


nan

6000.0

nan

0    False
1    False
2    False
3     True
dtype: bool

In [114]:
labels = ['b', 'a', 'c']
my_list = [True,False,False]
s2 = pd.Series(data=my_list, index=labels)
print_serie_attr(s2)
s2.sum()
s2.prod()
s2.mean()

Index:
	Index(['b', 'a', 'c'], dtype='object')
DType:
	bool
Values:
	[ True False False]
Name:
	None


1

0

0.3333333333333333

In [115]:
my_dict = {'a':'Z','b':244,'c':'X'}
s3 = pd.Series(my_dict)
print_serie_attr(s3)

Index:
	Index(['a', 'b', 'c'], dtype='object')
DType:
	object
Values:
	['Z' 244 'X']
Name:
	None


In [116]:
my_list = [10,20,30,40]
labels = ['a','b','c','a']
s4 = pd.Series(my_list, index = labels)
type(s4['b'])
s4.iloc[[2]]

numpy.int64

c    30
dtype: int64

In [117]:
pokemon = pd.read_csv("original_datasets/pokemon.csv", usecols = ["Pokemon"], squeeze = True)
print_serie_attr(pokemon)
# This only has one column called "Stock Price", squeeze is still needed to cast to serie
google = pd.read_csv("original_datasets/google_stock_price.csv",squeeze = True)
print_serie_attr(google)

Index:
	RangeIndex(start=0, stop=10, step=1)
DType:
	object
Values:
	['Bulbasaur' 'Ivysaur' 'Venusaur' 'Charmander' 'Charmeleon' 'Charizard'
 'Squirtle' 'Wartortle' 'Blastoise' 'Caterpie']
Name:
	Pokemon
Index:
	RangeIndex(start=0, stop=10, step=1)
DType:
	float64
Values:
	[50.12 54.1  54.65 52.38 52.95 53.9  53.02 50.95 51.13 50.07]
Name:
	Stock Price


In [118]:
# s.head() and s.tail() creates a new serie object with the correspondent index portion 
a = google.tail()
a.index

RangeIndex(start=3007, stop=3012, step=1)

In [119]:
# Python Built-In Fucntions
len(google)
type(google)
list(s4) # Keeps the order, s4 have two equal index values
b = dict(s4)
b['a'].index
max(s4)
min(s1)

3012

pandas.core.series.Series

[10, 20, 30, 40]

Index(['a', 'a'], dtype='object')

40

10.0

In [120]:
google_sorted_by_value = google.sort_values(ignore_index=False).head() # Use the original index

In [121]:
google_sorted_by_value.head()
google_sorted_by_value.index
#google_sorted_by_value[1] #=> This will raise a KeyError excption because the index is IndexInt64 but there is no 1
google_sorted_by_value.index.to_frame()
google_sorted_by_value.index.to_numpy()

11    49.95
9     50.07
0     50.12
10    50.70
12    50.74
Name: Stock Price, dtype: float64

Int64Index([11, 9, 0, 10, 12], dtype='int64')

Unnamed: 0,0
11,11
9,9
0,0
10,10
12,12


array([11,  9,  0, 10, 12])

In [122]:
c = google.sort_values(ignore_index=True).head() # Now the index will be again a RangeIndex
c

0    49.95
1    50.07
2    50.12
3    50.70
4    50.74
Name: Stock Price, dtype: float64

In [123]:
c.index.sort_values(return_indexer=True) # tuple wihte sorted index and the numpy.ndarray indexer

(Int64Index([0, 1, 2, 3, 4], dtype='int64'), array([0, 1, 2, 3, 4]))

In [124]:
cc = c.index.value_counts(bins = 3)
cc.index

IntervalIndex([(2.667, 4.0], (-0.005, 1.333], (1.333, 2.667]],
              closed='right',
              dtype='interval[float64]')

In [125]:
c.index.to_frame()
c.index.where(cond = (True,False,True,True,False), other = ["aa","bb","cc","dd","ee"])

Unnamed: 0,0
0,0
1,1
2,2
3,3
4,4


Index(['0', 'bb', '2', '3', 'ee'], dtype='object')

In [126]:
s2
# This returns just 'Default' when an array-like object is given with some values not present
s2.get(key = ["a","b","d","e"],default = "Default") 

b     True
a    False
c    False
dtype: bool

'Default'

In [127]:
a = iter(s2)
next(a)
next(a)

True

False

In [128]:
def f_test(serie):
    if isinstance(serie, pd.Series):
        return("Serie")
    else:
        return(type(serie))
    
s1.transform(f_test)

0    <class 'float'>
1    <class 'float'>
2    <class 'float'>
3    <class 'float'>
dtype: object

In [129]:
pd.Series(data = [1,20,20,20,30,np.nan]).mode(dropna=True)

0    20.0
dtype: float64

## Binary Operations

In [163]:
a = pd.Series([1, 1, 1, 3.0], index=['a', 'b', 'c', 'd'])
b = pd.Series([1, np.nan, 1, np.nan], index=['a', 'b', 'd', 'e'])
a + b
a.add(b, fill_value = 0)
acat = a.astype(pd.CategoricalDtype(categories = [1,3.0]))
type(acat.values)

a    2.0
b    NaN
c    NaN
d    4.0
e    NaN
dtype: float64

a    2.0
b    1.0
c    1.0
d    4.0
e    NaN
dtype: float64

pandas.core.arrays.categorical.Categorical

In [131]:
a + [10,10,10,10]

a    11.0
b    11.0
c    11.0
d    13.0
dtype: float64

In [132]:
c = pd.Series([1, 20, 300, 40000], index = ['a', 'b', 'c', 'd'])
b = pd.Series([2, 20, 300, 40000], index = ['c', 'b', 'a', 'd'])
c[[1]]
citer = c.__iter__()

b    20
dtype: int64

In [133]:
next(citer)

1

In [143]:
c.loc[b > 100]
c[b > 100]
b.between(200,300)

a        1
d    40000
dtype: int64

a        1
d    40000
dtype: int64

c    False
b    False
a     True
d    False
dtype: bool

### Categorical Values

In [160]:
c_df = pd.DataFrame(data=c, columns=["Col_0"])
b_truncated = b.iloc[:-1] # This will raise exception if used to filter c_df
c_df
c_df[b > 100]

Unnamed: 0,Col_0
a,True
b,False
c,True
d,False


  c_df[b > 100]


Unnamed: 0,Col_0
a,True
d,False


In [162]:
c = pd.Series([True, False, True, pd], index=['a', 'b', np.nan, 'd'])
d = pd.Series([True, np.nan, True, np.nan], index=['a', 'b', 'd', 'e'])


In [48]:
c.fillna(value={'a':100,'d':150})

a       True
b      False
NaN     True
d        150
dtype: object

In [157]:
c = pd.Series([True, False, True, False], index=['a', 'b', 'c', 'd'])
d = pd.Series([True, np.nan, True, True], index=['c', 'd', 'e', 'f'])
AA = c.align(d,join="right")
AA[0]
AA[1]

c     True
d    False
e      NaN
f      NaN
dtype: object

c    True
d     NaN
e    True
f    True
dtype: object

## unique(), nunique() methods and unique attribute

In [169]:
s = pd.Series([1, 300, 300, 40000], index = ['a', 'b', 'c', 'a'])
s.unique()
type(s.unique)

array([    1,   300, 40000])

method

In [166]:
s.nunique()

3