# Pandas Series Play

In [1]:
# import libraries needed
import numpy as np
import pandas as pd

In [2]:
# create an array with 5 integers
array = np.arange(5)
#call it in a series
pd.Series(array)


0    0
1    1
2    2
3    3
4    4
dtype: int32

In [3]:
#a simplified version of the above cell
pd.Series(np.arange(5))

0    0
1    1
2    2
3    3
4    4
dtype: int32

In [4]:
#adding "name" to the series 
pd.Series(np.arange(5), name="Test Array")

0    0
1    1
2    2
3    3
4    4
Name: Test Array, dtype: int32

In [5]:
#storing it as an object to look at the properties
series = pd.Series(np.arange(5), name="Test Array")

In [6]:
series.values

array([0, 1, 2, 3, 4])

In [7]:
series.values.mean() #numpy method

2.0

In [8]:
series.mean() #pandas method

2.0

In [9]:
#to change the index
series.index = [10,20,30,40,50]
series

10    0
20    1
30    2
40    3
50    4
Name: Test Array, dtype: int32

In [10]:
#to change name of series
series.name=" Series rename"
series

10    0
20    1
30    2
40    3
50    4
Name:  Series rename, dtype: int32

## .astype

In [11]:
#new array to test the "astype" function
pd.Series(np.arange(5), name="Type Test")

0    0
1    1
2    2
3    3
4    4
Name: Type Test, dtype: int32

In [12]:
#changing to float
pd.Series(np.arange(5), name="Test Array").astype('float')

0    0.0
1    1.0
2    2.0
3    3.0
4    4.0
Name: Test Array, dtype: float64

In [13]:
#changing to boolean
pd.Series(np.arange(5), name="Test Array").astype('bool')

0    False
1     True
2     True
3     True
4     True
Name: Test Array, dtype: bool

In [14]:
#changing to object
pd.Series(np.arange(5), name="Test Array").astype('object')

0    0
1    1
2    2
3    3
4    4
Name: Test Array, dtype: object

In [15]:
#changing back to an integer
pd.Series(np.arange(5), name="Test Array").astype('int')

0    0
1    1
2    2
3    3
4    4
Name: Test Array, dtype: int32

##  the index

In [16]:
sales = [0,5,155,0,518]
items = ["coffee", "bananas", "tea", "coconut", "sugar"]

sales_series = pd.Series(sales, index = items, name="Sales")
sales_series

coffee       0
bananas      5
tea        155
coconut      0
sugar      518
Name: Sales, dtype: int64

In [17]:
sales_series["tea"]

155

In [18]:
my_series = pd.Series(range(5))

my_series

0    0
1    1
2    2
3    3
4    4
dtype: int64

In [19]:
my_series[3]

3

In [20]:
my_series[1::2]

1    1
3    3
dtype: int64

In [21]:
my_series[:4]

0    0
1    1
2    2
3    3
dtype: int64

In [22]:
my_series2 = pd.Series(range(5), index=['Day 0', 'Day 1', 'Day 2', 'Day 3', 'Day 4'])
my_series2

Day 0    0
Day 1    1
Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [23]:
my_series2["Day 2"]

2

In [24]:
my_series2["Day 2":"Day 4"]

Day 2    2
Day 3    3
Day 4    4
dtype: int64

In [25]:
my_series2[::2]

Day 0    0
Day 2    2
Day 4    4
dtype: int64

In [26]:
my_series2["Day 1"::2]

Day 1    1
Day 3    3
dtype: int64

## .iloc Method vs .loc

In [27]:
my_series2.iloc[2]

2

In [28]:
# .iloc works off of index, a named index doesn't work
my_series2.iloc['Day 4']

TypeError: Cannot index by location index with a non-integer key

In [29]:
# .loc works either way
my_series2.loc['Day 4']

4

In [30]:
my_series2.iloc[4]

4

## Numeric Series Operations

In [31]:
num_series =pd.Series([1,np.NaN,3,4,5], index=['Day 0', 'Day 1', 'Day 2', 'Day 3', 'Day 4'])
num_series

Day 0    1.0
Day 1    NaN
Day 2    3.0
Day 3    4.0
Day 4    5.0
dtype: float64

In [32]:
#adding 1 to the series only affects the numeric values, not the NaN
num_series +1

Day 0    2.0
Day 1    NaN
Day 2    4.0
Day 3    5.0
Day 4    6.0
dtype: float64

In [33]:
# to fill in the NaN
num_series.add(1,fill_value=0)

Day 0    2.0
Day 1    1.0
Day 2    4.0
Day 3    5.0
Day 4    6.0
dtype: float64

In [34]:
#create new series by naming it
num_series2 =num_series.add(1,fill_value=0)
num_series2

Day 0    2.0
Day 1    1.0
Day 2    4.0
Day 3    5.0
Day 4    6.0
dtype: float64

In [35]:
num_series2 / 2

Day 0    1.0
Day 1    0.5
Day 2    2.0
Day 3    2.5
Day 4    3.0
dtype: float64

## String Methods

In [None]:
#all calls start  with the series, then ".str" and then the operation

## Numeric Series Aggregation

In [36]:
transactions = pd.read_csv("../retail/transactions.csv")

transactions_series = pd.Series(transactions["transactions"])

transactions_series.iloc[:5]

0     770
1    2111
2    2358
3    3487
4    1922
Name: transactions, dtype: int64

In [37]:
transactions_series.count()

83488

In [38]:
transactions_series.mean()

1694.6021583940208

In [39]:
transactions_series.quantile([.5])

0.5    1393.0
Name: transactions, dtype: float64

In [40]:
transactions_series.quantile([.25, .5, .75])

0.25    1046.0
0.50    1393.0
0.75    2079.0
Name: transactions, dtype: float64

In [41]:
transactions_series.sum()

141478945

## Categorical Series Aggregation

In [44]:
my_series.nunique()

5

In [45]:
my_series.unique()

array([0, 1, 2, 3, 4], dtype=int64)

## Missing Data