In [None]:
import pandas as pd
import numpy as np

In [None]:
series = pd.Series([1, 2, 3, 4, 5])
series

In [None]:
series.dtype

In [None]:
series.index

In [None]:
series = pd.Series([1, 2, 3], index=['c', 'b', 'a'])
series

In [None]:
series.index

In [None]:
series[1]

In [None]:
series['b']

In [None]:
series[['a', 'b']]

In [None]:
series[['b', 'b']]

In [None]:
# Numpy vectorizations/functions can be performed on pandas Series
series ** 2

In [None]:
# Indexes can be checked for existance with 'in' ...
'a' in series

In [None]:
# ... values can not.
1 in series

In [None]:
# You can create a series with a dictionary
races = {'freezer 5 mile': 5, 'armory night at the races': 1, 'ocean breeze miles mania': 1}
pd.Series(races)

In [None]:
series = pd.Series({1: 1, 2: 2, 3: 4, 4: 8}, np.arange(5))
series

In [None]:
pd.isnull(series)

In [None]:
pd.notnull(series)

In [None]:
(pd.notnull(series) == series.notnull()).all()

In [None]:
(pd.isnull(series) == series.isnull()).all()

In [None]:
# It didn't actually snow :(
snow = pd.Series([3, 0.1, 0], index=[17, 18, 19])
rain = pd.Series([1.1, 0, 0.6, 0], index=[16, 17, 18, 19])

snow + rain

In [None]:
snow.name = 'snow'
snow

In [None]:
snow.index = ['Feb 17th', 'Feb 18th', 'Feb 19th']
snow

In [None]:
runs = {
    'user': ['andy', 'andy', 'andy'],
    'type': ['run', 'core', 'run'],
    'date': ['02-19-2020', '02-19-2020', '02-18-2020'],
    'time': ['20:15', '8:00', '16:00']
}
frame = pd.DataFrame(runs)
frame

In [None]:
frame.head(2)

In [None]:
frame.tail(2)

In [None]:
frame.time

In [None]:
frame['time']

In [None]:
frame['time'] = '10:00'
frame

In [None]:
frame['time'] = np.array(['25:00', '8:00', '20:00'])
frame

In [None]:
frame['date'] = pd.Series(['02-01-2020', '02-02-2020', '02-03-2020'], index=[1, 2, 0])
frame

In [None]:
frame.columns

In [None]:
frame['distance'] = pd.Series([3.5, 3], index=[0, 2])
frame

In [None]:
del frame['distance']
frame

In [None]:
frame.T

In [None]:
frame.T.T

In [None]:
frame.values

In [None]:
# Indexes in pandas are immutable
index = pd.Index(['a', 'b', 'c'])
index

In [None]:
try:
    index[3] = 'd'
except TypeError as e:
    print(e)

In [None]:
frame.index

In [None]:
frame.reindex([1, 2, 0])

In [None]:
frame.drop(2)

In [None]:
frame.drop([0, 2])

In [None]:
frame.drop('user', axis=1)

In [None]:
frame.drop('time', axis=1, inplace=True)
frame

In [None]:
data_xctf = {
    '8K': ['24:20.80', '24:33.50', '24:58.80', None, '26:24.20'],
    '6K': ['18:58.80', '19:10.20', '19:25.80', '20:54.00', '20:20.50'],
    '5K': ['15:32.00', '15:39.00', '15:59.00', '17:31.60', '16:38.40'],
    '10000m': [None, None, '31:51.73', '35:50.22', None],
    '5000m': ['14:23.21', None, '15:27.01', '16:44.14', '15:27.64'],
    '3000m': ['8:32.83', '8:52.60', '8:51.80', '9:47.70', '9:03.60'],
    '1 Mile': ['4:20.59', '4:20.39', '4:40.34', '4:57.53', '4:40.76'],
    '1500m': ['3:54.67', '3:57.78', None, '4:32.14', '4:08.17']
}
run_dataframe = pd.DataFrame(data_xctf, index=['Thomas Caulfield', 'Joseph Smith', 'Ben Fishbein', 'Lisa Grohn', 'Andy Jarombek'])
run_dataframe

In [None]:
run_dataframe['Joseph Smith':'Lisa Grohn']

In [None]:
run_dataframe[1:3]

In [None]:
run_dataframe.loc[:, ['8K', '6K', '5K']]

In [None]:
run_dataframe.iloc[3, [0, 1, 2]]

In [None]:
run_dataframe.iloc[np.array([0, 1, 2, 4]), np.arange(3, 8)]

In [None]:
run_dataframe.at['Thomas Caulfield', '5000m']

In [None]:
run_dataframe.iat[0, 4]

In [None]:
data_xctf = {
    '8K': [1460.80, 1473.50, 1498.80, np.nan, 1584.20],
    '6K': [1138.80, 1150.20, 1165.80, 1254.00, 1220.50],
    '5K': [932.00, 939.00, 959.00, 1051.60, 998.40]
}
run_sec_dataframe = pd.DataFrame(data_xctf, index=['Thomas Caulfield', 'Joseph Smith', 'Ben Fishbein', 'Lisa Grohn', 'Andy Jarombek'])
run_sec_dataframe

In [None]:
# Tom and Joe's combined seconds for races.
run_sec_dataframe.iloc[0] + run_sec_dataframe.iloc[1]

In [None]:
# Everyones 400m pace for the 6K
run_sec_dataframe.loc[:, ['6K']] / 15

In [None]:
# Pace per 400m for each race.
run_sec_dataframe / [20, 15, 12.5]

In [None]:
run_seconds_dataframe = run_sec_dataframe.T
run_seconds_dataframe

In [None]:
run_seconds_dataframe.loc['5K'] / 25

In [None]:
# Series and DataFrame types have methods for each arithmetic operation.
run_seconds_dataframe.loc['5K'].div(25)

In [None]:
mean_func = lambda x: x.mean()
run_sec_dataframe.apply(mean_func)

In [None]:
run_seconds_dataframe.apply(mean_func, axis='columns')

In [None]:
run_seconds_dataframe.sort_index()

In [None]:
run_seconds_dataframe.sort_index(axis=1)

In [None]:
run_seconds_dataframe.T.sort_values(by='5K')

In [None]:
run_seconds_dataframe.T.rank()

In [None]:
# Get an overview of different statistics about the DataFrame
run_seconds_dataframe.describe()

In [None]:
# Mean absolute deviation from mean value - the mean of all distances from the mean value.
run_seconds_dataframe.mad()