In [1]:
import pandas as pd
import numpy as np

# Basic Series

In [8]:
array = np.arange(5)
series = pd.Series(array)

In [35]:
pd.Series(np.arange(6), name="test")

0    0
1    1
2    2
3    3
4    4
5    5
Name: test, dtype: int32

In [23]:
series.values

array([0, 1, 2, 3, 4])

In [10]:
series.values.mean()

2.0

In [11]:
series.mean()

2.0

In [12]:
series.index

RangeIndex(start=0, stop=5, step=1)

In [13]:
series

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [16]:
series.index = [i for i in range(1,6)]
series

1    0
2    1
3    2
4    3
5    4
dtype: int32

In [24]:
series.name = "special series"
series

1    0
2    1
3    2
4    3
5    4
Name: special series, dtype: int32

# Data Types Conversion

In [25]:
series

1    0
2    1
3    2
4    3
5    4
Name: special series, dtype: int32

In [26]:
series.astype('float')

1    0.0
2    1.0
3    2.0
4    3.0
5    4.0
Name: special series, dtype: float64

In [27]:
series.astype('bool')

1    False
2     True
3     True
4     True
5     True
Name: special series, dtype: bool

In [28]:
series.astype('object')

1    0
2    1
3    2
4    3
5    4
Name: special series, dtype: object

In [29]:
series.astype('string')

1    0
2    1
3    2
4    3
5    4
Name: special series, dtype: string

# Indexing

In [37]:
my_series = pd.Series(range(5))
my_series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [39]:
my_series[3]

3

In [40]:
my_series[1::3]

1    1
4    4
dtype: int64

In [42]:
my_series = pd.Series(range(5), index=['Day 0', 'Day 1', 'Day 2', 'Day 3', 'Day 4'])
my_series

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [44]:
my_series['Day 1']

1

In [46]:
my_series['Day 1':'Day 3']

Day 1    1
Day 2    2
Day 3    3
dtype: int64

# .iloc and .loc Method

In [47]:
my_series

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [76]:
my_series.iloc[[1,2]]
# my_series.iloc['Day 0']

Day 1    1
Day 2    2
dtype: int64

In [65]:
my_series.iloc[:3]

Day 0    0
Day 1    1
Day 2    2
dtype: int64

In [69]:
my_series.loc[['Day 0', 'Day 1']]

Day 0    0
Day 1    1
dtype: int64

In [74]:
my_series.loc['Day 0':'Day 2']

Day 0    0
Day 1    1
Day 2    2
dtype: int64

In [77]:
my_series.reset_index(drop=True)

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [80]:
my_series.reset_index(drop=True).loc[:2]

0    0
1    1
2    2
dtype: int64

In [82]:
new_series = pd.Series(range(5), index=['Day 0', 'Day 0', 'Day 2', 'Day 3', 'Day 4'])
new_series

Day 0    0
Day 0    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [83]:
new_series.loc['Day 0']

Day 0    0
Day 0    1
dtype: int64

In [89]:
new_series.loc['Day 0'][0]

0

In [84]:
new_series.reset_index()

Unnamed: 0,index,0
0,Day 0,0
1,Day 0,1
2,Day 2,2
3,Day 3,3
4,Day 4,4


In [87]:
new_series.reset_index(drop=True)

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [88]:
new_series[0:2]

Day 0    0
Day 0    1
dtype: int64

In [90]:
new_series

Day 0    0
Day 0    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

# Filtering And Sorting Series

In [101]:
my_series

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [104]:
my_series.loc[(my_series != 2)]

Day 0    0
Day 1    1
Day 3    3
Day 4    4
dtype: int64

In [105]:
my_series.loc[~(my_series != 2)]

Day 2    2
dtype: int64

In [110]:
my_series.loc[~my_series.isin([1,2])]

Day 0    0
Day 3    3
Day 4    4
dtype: int64

In [111]:
my_series.loc[(my_series > 2)]

Day 3    3
Day 4    4
dtype: int64

In [112]:
my_series.loc[~(my_series > 2)]

Day 0    0
Day 1    1
Day 2    2
dtype: int64

In [114]:
my_series.loc[my_series.gt(2)]

Day 0    0
Day 1    1
Day 2    2
dtype: int64

In [115]:
my_series.loc[~my_series.gt(2)]

Day 0    0
Day 1    1
Day 2    2
dtype: int64

In [118]:
my_series.loc[my_series.ge(2)]

Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [116]:
my_series.loc[my_series.lt(2)]

Day 0    0
Day 1    1
dtype: int64

In [119]:
my_series.loc[~my_series.lt(2)]

Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [120]:
my_series.loc[my_series.le(2)]

Day 0    0
Day 1    1
Day 2    2
dtype: int64

In [123]:
mask = (my_series.isin([1,2])) & (my_series >= 2)
my_series[mask]

Day 2    2
dtype: int64

### sorting

In [124]:
my_series

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [127]:
my_series.sort_values(ascending=False)

Day 4    4
Day 3    3
Day 2    2
Day 1    1
Day 0    0
dtype: int64

In [128]:
my_series.sort_index()

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

# Numeric Series operations

In [132]:
my_series1 = pd.Series([1, np.NaN, 2, 3,4], index=['Day 0', 'Day 1', 'Day 2', 'Day 3', 'Day 4'])
my_series1

Day 0    1.0
Day 1    NaN
Day 2    2.0
Day 3    3.0
Day 4    4.0
dtype: float64

In [133]:
my_series1 + 1

Day 0    2.0
Day 1    NaN
Day 2    3.0
Day 3    4.0
Day 4    5.0
dtype: float64

In [136]:
my_series1.add(1, fill_value=0)

Day 0    2.0
Day 1    1.0
Day 2    3.0
Day 3    4.0
Day 4    5.0
dtype: float64

In [141]:
my_series2 = my_series1.add(1, fill_value=0).astype('int')
my_series2

Day 0    2
Day 1    1
Day 2    3
Day 3    4
Day 4    5
dtype: int32

In [143]:
my_series2 / 2 

Day 0    1.0
Day 1    0.5
Day 2    1.5
Day 3    2.0
Day 4    2.5
dtype: float64

In [144]:
my_series2 // 2 

Day 0    1
Day 1    0
Day 2    1
Day 3    2
Day 4    2
dtype: int32

In [145]:
my_series + my_series2

Day 0    2
Day 1    2
Day 2    5
Day 3    7
Day 4    9
dtype: int64

In [146]:
my_series.add(my_series2)

Day 0    2
Day 1    2
Day 2    5
Day 3    7
Day 4    9
dtype: int64

In [147]:
(my_series + my_series2) / 2

Day 0    1.0
Day 1    1.0
Day 2    2.5
Day 3    3.5
Day 4    4.5
dtype: float64

# Text Series Operations

In [157]:
string_series = pd.Series(['Day 0', 'Day 1', 'Day 2', 'Day 3', 'Day 4'])
string_series

0    Day 0
1    Day 1
2    Day 2
3    Day 3
4    Day 4
dtype: object

In [150]:
string_series.str.upper()

0    DAY 0
1    DAY 1
2    DAY 2
3    DAY 3
4    DAY 4
dtype: object

In [153]:
string_series.str.upper().str.contains('DAY 1')

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [158]:
string_series.str.strip()

0    Day 0
1    Day 1
2    Day 2
3    Day 3
4    Day 4
dtype: object

In [167]:
string_series.str[-1].astype('int')

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [171]:
string_series.str[1:3]

0    ay
1    ay
2    ay
3    ay
4    ay
dtype: object

In [173]:
string_series.str.split(' ', expand=True)

Unnamed: 0,0,1
0,Day,0
1,Day,1
2,Day,2
3,Day,3
4,Day,4


# Numeric Aggregations

In [178]:
transactions = pd.read_csv('transactions.csv')
transactions_series = pd.Series(transactions['transactions'])
transactions_series.iloc[:5]

0     770
1    2111
2    2358
3    3487
4    1922
Name: transactions, dtype: int64

In [179]:
transactions_series.count()

83488

In [180]:
transactions_series.sum()

141478945

In [181]:
transactions_series.mean()

1694.6021583940208

In [184]:
transactions_series.iloc[:5].quantile([.5], interpolation='nearest')

0.5    2111
Name: transactions, dtype: int64

In [187]:
transactions_series.unique()

array([ 770, 2111, 2358, ..., 4553, 4400, 4392], dtype=int64)

In [189]:
new_series

Day 0    0
Day 0    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [192]:
string_series1 = pd.Series(['Day 0', 'Day 0', 'Day 2', 'Day 3', 'Day 4'])
string_series1

0    Day 0
1    Day 0
2    Day 2
3    Day 3
4    Day 4
dtype: object

In [193]:
string_series1.unique()

array(['Day 0', 'Day 2', 'Day 3', 'Day 4'], dtype=object)

In [195]:
string_series1.value_counts(normalize=True)

Day 0    0.4
Day 2    0.2
Day 3    0.2
Day 4    0.2
Name: proportion, dtype: float64

# Work with Missing Data 

In [205]:
missing_data = pd.Series([np.NaN]*5)
missing_data # can't use it .astype('int')

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
dtype: float64

In [202]:
missing_data.sum()

0.0

In [203]:
missing_data.isna().sum()

5

In [206]:
missing_data.isna().astype('int')

0    1
1    1
2    1
3    1
4    1
dtype: int32