In [1]:
import pandas as pd
import numpy as np

In [2]:
array = np.arange(5)

pd.Series(array)

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [7]:
series = pd.Series(np.arange(5), name='test')

In [8]:
series

0    0
1    1
2    2
3    3
4    4
Name: test, dtype: int64

In [9]:
series.values.mean()

np.float64(2.0)

In [12]:
series.index = [10, 20, 30, 40, 50]
series

10    0
20    1
30    2
40    3
50    4
Name: test, dtype: int64

In [13]:
series.name = 'special series'
series

10    0
20    1
30    2
40    3
50    4
Name: special series, dtype: int64

In [15]:
series.dtype

dtype('int64')

### Convertion

In [19]:
pd.Series(range(5))
pd.Series(range(5)).astype(float)
pd.Series(range(5)).astype(bool)
pd.Series(range(5)).astype(object)

0    0
1    1
2    2
3    3
4    4
dtype: object

### Index

In [22]:
my_series = pd.Series(range(5))
my_series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [24]:
my_series[3]
my_series[:4]

0    0
1    1
2    2
3    3
dtype: int64

In [25]:
my_series = pd.Series(range(5), index=['day1','day2','day3','day4','day5'])
my_series

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [28]:
my_series['day1':'day3']
my_series[::2]

day1    0
day3    2
day5    4
dtype: int64

### iloc Method

In [29]:
my_series = pd.Series(range(5), index=['day1','day2','day3','day4','day5'])
my_series

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [34]:
my_series.iloc[1:4]

day2    1
day3    2
day4    3
dtype: int64

### loc method

In [None]:
my_series = pd.Series(range(5), index=['day1','day2','day3','day4','day5'])
my_series

In [38]:
my_series.loc['day1':'day4']

day1    0
day2    1
day3    2
day4    3
dtype: int64

In [39]:
my_series.reset_index(drop=True)

0    0
1    1
2    2
3    3
4    4
dtype: int64

### Duplicate index values

In [43]:
my_series = pd.Series(range(5), index=['day0','day0','day3','day4','day5'])
my_series

day0    0
day0    1
day3    2
day4    3
day5    4
dtype: int64

In [47]:
my_series['day0']

day0    0
day0    1
dtype: int64

In [49]:
my_series.reset_index(drop=True)

0    0
1    1
2    2
3    3
4    4
dtype: int64

### Filtering

In [50]:
my_series = pd.Series(range(5), index=['day1','day2','day3','day4','day5'])
my_series

day1    0
day2    1
day3    2
day4    3
day5    4
dtype: int64

In [61]:
my_series[my_series == 2]
my_series.loc[my_series == 2]
my_series.loc[my_series != 2]
my_series.loc[~(my_series != 2)]
my_series.loc[my_series.isin([2,3])]
my_series[my_series > 2]

day4    3
day5    4
dtype: int64

### Sorting

In [62]:
my_series = pd.Series(range(5), index=['day 0','day 1','day 2','day 3','day 4'])
my_series

day 0    0
day 1    1
day 2    2
day 3    3
day 4    4
dtype: int64

In [65]:
my_series.sort_values(ascending=True)
my_series.sort_values(ascending=False)

day 4    4
day 3    3
day 2    2
day 1    1
day 0    0
dtype: int64

### Arithmetic

In [70]:
my_series = pd.Series([1, np.nan,2,3,4], index=['day0','day0','day3','day4','day5'])
my_series

day0    1.0
day0    NaN
day3    2.0
day4    3.0
day5    4.0
dtype: float64

In [74]:
my_series + 1
my_series2 = my_series.add(1, fill_value=0)
my_series2

day0    2.0
day0    1.0
day3    3.0
day4    4.0
day5    5.0
dtype: float64

In [77]:
my_series2 / 2
my_series2 * 2
my_series2 + my_series

day0    3.0
day0    NaN
day3    5.0
day4    7.0
day5    9.0
dtype: float64

### String Methods

In [79]:
my_series = pd.Series(['day 0','day 1','day 2','day 3','day 4'])
my_series

0    day 0
1    day 1
2    day 2
3    day 3
4    day 4
dtype: object

In [82]:
my_series.str.contains('1')
my_series.str.contains('day')
my_series.str.contains('day 1')

0    False
1     True
2    False
3    False
4    False
dtype: bool

In [88]:
my_series.str.upper().str.contains('day')
my_series.str.strip('day').astype(int)
my_series.str[-1].astype(int)
my_series.str[:3]

0    day
1    day
2    day
3    day
4    day
dtype: object

In [93]:
my_series.str.split(' ', expand=True)

Unnamed: 0,0,1
0,day,0
1,day,1
2,day,2
3,day,3
4,day,4


### Agregation

In [99]:
transactions = pd.read_csv('../transactions.csv')

transactions_series = pd.Series(transactions_df['transactions'])

In [101]:
transactions_series.head()

0     770
1    2111
2    2358
3    3487
4    1922
Name: transactions, dtype: int64

In [106]:
transactions_series.count()
transactions_series.sum()
transactions_series.mean()
transactions_series.quantile([.5], interpolation='nearest')

0.5    1393
Name: transactions, dtype: int64

### Categorical Aggregation

In [108]:
my_series = pd.Series(['day 0','day 0','day 2','day 3','day 4'])
my_series

0    day 0
1    day 0
2    day 2
3    day 3
4    day 4
dtype: object

In [109]:
my_series.nunique()

4

In [110]:
my_series.unique()

array(['day 0', 'day 2', 'day 3', 'day 4'], dtype=object)

In [112]:
my_series.value_counts()
my_series.value_counts(normalize=True)

day 0    0.4
day 2    0.2
day 3    0.2
day 4    0.2
Name: proportion, dtype: float64

### Missing data`

In [118]:
my_series = pd.Series([np.nan] * 5)
my_series

0   NaN
1   NaN
2   NaN
3   NaN
4   NaN
dtype: float64

In [121]:
my_series.isna()
my_series.isna().sum()

np.int64(5)

In [122]:
my_series = pd.Series(range(5))
my_series[:2] = pd.NA
my_series

0    NaN
1    NaN
2    2.0
3    3.0
4    4.0
dtype: float64

In [125]:
my_series.value_counts(dropna=False)

NaN    2
2.0    1
3.0    1
4.0    1
Name: count, dtype: int64

In [126]:
my_series.fillna(my_series.mean())

0    3.0
1    3.0
2    2.0
3    3.0
4    4.0
dtype: float64

In [127]:
my_series.dropna()

2    2.0
3    3.0
4    4.0
dtype: float64

### Apply Method

In [132]:
my_series = pd.Series(['day 0','day 0','day 2','day 3','day 4'])
my_series

0    day 0
1    day 0
2    day 2
3    day 3
4    day 4
dtype: object

In [133]:
my_series.apply(lambda x: x[-1])

0    0
1    0
2    2
3    3
4    4
dtype: object

### Pandas where vs Numpy where

In [137]:
(my_series.where(
    my_series.str.contains('2'), 'nope').where(
    ~my_series.str.contains('2'), 'found it'))

0        nope
1        nope
2    found it
3        nope
4        nope
dtype: object

In [139]:
pd.Series(np.where(my_series.str.contains('2'), 'found it', 'nope'))

0        nope
1        nope
2    found it
3        nope
4        nope
dtype: object