In [75]:
import pandas as pd
import numpy as np

In [76]:
series = pd.Series([1, 2, 3, 4, 5])
series

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [77]:
series.dtype

dtype('int64')

In [78]:
series.index

RangeIndex(start=0, stop=5, step=1)

In [79]:
series = pd.Series([1, 2, 3], index=['c', 'b', 'a'])
series

c    1
b    2
a    3
dtype: int64

In [80]:
series.index

Index(['c', 'b', 'a'], dtype='object')

In [81]:
series[1]

2

In [82]:
series['b']

2

In [83]:
series[['a', 'b']]

a    3
b    2
dtype: int64

In [84]:
series[['b', 'b']]

b    2
b    2
dtype: int64

In [85]:
# Numpy vectorizations/functions can be performed on pandas Series
series ** 2

c    1
b    4
a    9
dtype: int64

In [86]:
# Indexes can be checked for existance with 'in' ...
'a' in series

True

In [87]:
# ... values can not.
1 in series

False

In [88]:
# You can create a series with a dictionary
races = {'freezer 5 mile': 5, 'armory night at the races': 1, 'ocean breeze miles mania': 1}
pd.Series(races)

freezer 5 mile               5
armory night at the races    1
ocean breeze miles mania     1
dtype: int64

In [89]:
series = pd.Series({1: 1, 2: 2, 3: 4, 4: 8}, np.arange(5))
series

0    NaN
1    1.0
2    2.0
3    4.0
4    8.0
dtype: float64

In [90]:
pd.isnull(series)

0     True
1    False
2    False
3    False
4    False
dtype: bool

In [91]:
pd.notnull(series)

0    False
1     True
2     True
3     True
4     True
dtype: bool

In [92]:
(pd.notnull(series) == series.notnull()).all()

True

In [93]:
(pd.isnull(series) == series.isnull()).all()

True

In [94]:
# It didn't actually snow :(
snow = pd.Series([3, 0.1, 0], index=[17, 18, 19])
rain = pd.Series([1.1, 0, 0.6, 0], index=[16, 17, 18, 19])

snow + rain

16    NaN
17    3.0
18    0.7
19    0.0
dtype: float64

In [95]:
snow.name = 'snow'
snow

17    3.0
18    0.1
19    0.0
Name: snow, dtype: float64

In [96]:
snow.index = ['Feb 17th', 'Feb 18th', 'Feb 19th']
snow

Feb 17th    3.0
Feb 18th    0.1
Feb 19th    0.0
Name: snow, dtype: float64

In [97]:
runs = {
    'user': ['andy', 'andy', 'andy'],
    'type': ['run', 'core', 'run'],
    'date': ['02-19-2020', '02-19-2020', '02-18-2020'],
    'time': ['20:15', '8:00', '16:00']
}
frame = pd.DataFrame(runs)
frame

Unnamed: 0,user,type,date,time
0,andy,run,02-19-2020,20:15
1,andy,core,02-19-2020,8:00
2,andy,run,02-18-2020,16:00


In [98]:
frame.head(2)

Unnamed: 0,user,type,date,time
0,andy,run,02-19-2020,20:15
1,andy,core,02-19-2020,8:00


In [99]:
frame.tail(2)

Unnamed: 0,user,type,date,time
1,andy,core,02-19-2020,8:00
2,andy,run,02-18-2020,16:00


In [100]:
frame.time

0    20:15
1     8:00
2    16:00
Name: time, dtype: object

In [101]:
frame['time']

0    20:15
1     8:00
2    16:00
Name: time, dtype: object

In [102]:
frame['time'] = '10:00'
frame

Unnamed: 0,user,type,date,time
0,andy,run,02-19-2020,10:00
1,andy,core,02-19-2020,10:00
2,andy,run,02-18-2020,10:00


In [103]:
frame['time'] = np.array(['25:00', '8:00', '20:00'])
frame

Unnamed: 0,user,type,date,time
0,andy,run,02-19-2020,25:00
1,andy,core,02-19-2020,8:00
2,andy,run,02-18-2020,20:00


In [104]:
frame['date'] = pd.Series(['02-01-2020', '02-02-2020', '02-03-2020'], index=[1, 2, 0])
frame

Unnamed: 0,user,type,date,time
0,andy,run,02-03-2020,25:00
1,andy,core,02-01-2020,8:00
2,andy,run,02-02-2020,20:00


In [105]:
frame.columns

Index(['user', 'type', 'date', 'time'], dtype='object')

In [106]:
frame['distance'] = pd.Series([3.5, 3], index=[0, 2])
frame

Unnamed: 0,user,type,date,time,distance
0,andy,run,02-03-2020,25:00,3.5
1,andy,core,02-01-2020,8:00,
2,andy,run,02-02-2020,20:00,3.0


In [107]:
del frame['distance']
frame

Unnamed: 0,user,type,date,time
0,andy,run,02-03-2020,25:00
1,andy,core,02-01-2020,8:00
2,andy,run,02-02-2020,20:00


In [108]:
frame.T

Unnamed: 0,0,1,2
user,andy,andy,andy
type,run,core,run
date,02-03-2020,02-01-2020,02-02-2020
time,25:00,8:00,20:00


In [109]:
frame.T.T

Unnamed: 0,user,type,date,time
0,andy,run,02-03-2020,25:00
1,andy,core,02-01-2020,8:00
2,andy,run,02-02-2020,20:00


In [110]:
frame.values

array([['andy', 'run', '02-03-2020', '25:00'],
       ['andy', 'core', '02-01-2020', '8:00'],
       ['andy', 'run', '02-02-2020', '20:00']], dtype=object)

In [111]:
# Indexes in pandas are immutable
index = pd.Index(['a', 'b', 'c'])
index

Index(['a', 'b', 'c'], dtype='object')

In [112]:
try:
    index[3] = 'd'
except TypeError as e:
    print(e)

Index does not support mutable operations


In [113]:
frame.index

RangeIndex(start=0, stop=3, step=1)

In [115]:
frame.reindex([1, 2, 0])

Unnamed: 0,user,type,date,time
1,andy,core,02-01-2020,8:00
2,andy,run,02-02-2020,20:00
0,andy,run,02-03-2020,25:00


In [116]:
frame.drop(2)

Unnamed: 0,user,type,date,time
0,andy,run,02-03-2020,25:00
1,andy,core,02-01-2020,8:00


In [117]:
frame.drop([0, 2])

Unnamed: 0,user,type,date,time
1,andy,core,02-01-2020,8:00


In [118]:
frame.drop('user', axis=1)

Unnamed: 0,type,date,time
0,run,02-03-2020,25:00
1,core,02-01-2020,8:00
2,run,02-02-2020,20:00


In [119]:
frame.drop('time', axis=1, inplace=True)
frame

Unnamed: 0,user,type,date
0,andy,run,02-03-2020
1,andy,core,02-01-2020
2,andy,run,02-02-2020


In [131]:
data_xctf = {
    '8K': ['24:20.80', '24:33.50', '24:58.80', None, '26:24.20'],
    '6K': ['18:58.80', '19:10.20', '19:25.80', '20:54.00', '20:20.50'],
    '5K': ['15:32.00', '15:39.00', '15:59.00', '17:31.60', '16:38.40'],
    '10000m': [None, None, '31:51.73', '35:50.22', None],
    '5000m': ['14:23.21', None, '15:27.01', '16:44.14', '15:27.64'],
    '3000m': ['8:32.83', '8:52.60', '8:51.80', '9:47.70', '9:03.60'],
    '1 Mile': ['4:20.59', '4:20.39', '4:40.34', '4:57.53', '4:40.76'],
    '1500m': ['3:54.67', '3:57.78', None, '4:32.14', '4:08.17']
}
run_dataframe = pd.DataFrame(data_xctf, index=['Thomas Caulfield', 'Joseph Smith', 'Ben Fishbein', 'Lisa Grohn', 'Andy Jarombek'])
run_dataframe

Unnamed: 0,8K,6K,5K,10000m,5000m,3000m,1 Mile,1500m
Thomas Caulfield,24:20.80,18:58.80,15:32.00,,14:23.21,8:32.83,4:20.59,3:54.67
Joseph Smith,24:33.50,19:10.20,15:39.00,,,8:52.60,4:20.39,3:57.78
Ben Fishbein,24:58.80,19:25.80,15:59.00,31:51.73,15:27.01,8:51.80,4:40.34,
Lisa Grohn,,20:54.00,17:31.60,35:50.22,16:44.14,9:47.70,4:57.53,4:32.14
Andy Jarombek,26:24.20,20:20.50,16:38.40,,15:27.64,9:03.60,4:40.76,4:08.17


In [132]:
run_dataframe['Joseph Smith':'Lisa Grohn']

Unnamed: 0,8K,6K,5K,10000m,5000m,3000m,1 Mile,1500m
Joseph Smith,24:33.50,19:10.20,15:39.00,,,8:52.60,4:20.39,3:57.78
Ben Fishbein,24:58.80,19:25.80,15:59.00,31:51.73,15:27.01,8:51.80,4:40.34,
Lisa Grohn,,20:54.00,17:31.60,35:50.22,16:44.14,9:47.70,4:57.53,4:32.14


In [133]:
run_dataframe[1:3]

Unnamed: 0,8K,6K,5K,10000m,5000m,3000m,1 Mile,1500m
Joseph Smith,24:33.50,19:10.20,15:39.00,,,8:52.60,4:20.39,3:57.78
Ben Fishbein,24:58.80,19:25.80,15:59.00,31:51.73,15:27.01,8:51.80,4:40.34,


In [134]:
run_dataframe.loc[:, ['8K', '6K', '5K']]

Unnamed: 0,8K,6K,5K
Thomas Caulfield,24:20.80,18:58.80,15:32.00
Joseph Smith,24:33.50,19:10.20,15:39.00
Ben Fishbein,24:58.80,19:25.80,15:59.00
Lisa Grohn,,20:54.00,17:31.60
Andy Jarombek,26:24.20,20:20.50,16:38.40


In [137]:
run_dataframe.iloc[3, [0, 1, 2]]

8K        None
6K    20:54.00
5K    17:31.60
Name: Lisa Grohn, dtype: object

In [140]:
run_dataframe.iloc[np.array([0, 1, 2, 4]), np.arange(3, 8)]

Unnamed: 0,10000m,5000m,3000m,1 Mile,1500m
Thomas Caulfield,,14:23.21,8:32.83,4:20.59,3:54.67
Joseph Smith,,,8:52.60,4:20.39,3:57.78
Ben Fishbein,31:51.73,15:27.01,8:51.80,4:40.34,
Andy Jarombek,,15:27.64,9:03.60,4:40.76,4:08.17


In [141]:
run_dataframe.at['Thomas Caulfield', '5000m']

'14:23.21'

In [142]:
run_dataframe.iat[0, 4]

'14:23.21'

In [144]:
data_xctf = {
    '8K': [1460.80, 1473.50, 1498.80, np.nan, 1584.20],
    '6K': [1138.80, 1150.20, 1165.80, 1254.00, 1220.50],
    '5K': [932.00, 939.00, 959.00, 1051.60, 998.40]
}
run_sec_dataframe = pd.DataFrame(data_xctf, index=['Thomas Caulfield', 'Joseph Smith', 'Ben Fishbein', 'Lisa Grohn', 'Andy Jarombek'])
run_sec_dataframe

Unnamed: 0,8K,6K,5K
Thomas Caulfield,1460.8,1138.8,932.0
Joseph Smith,1473.5,1150.2,939.0
Ben Fishbein,1498.8,1165.8,959.0
Lisa Grohn,,1254.0,1051.6
Andy Jarombek,1584.2,1220.5,998.4


In [157]:
# Tom and Joe's combined seconds for races.
run_sec_dataframe.iloc[0] + run_sec_dataframe.iloc[1]

8K    2934.3
6K    2289.0
5K    1871.0
dtype: float64

In [152]:
# Everyones 400m pace for the 6K
run_sec_dataframe.loc[:, ['6K']] / 15

Unnamed: 0,6K
Thomas Caulfield,75.92
Joseph Smith,76.68
Ben Fishbein,77.72
Lisa Grohn,83.6
Andy Jarombek,81.366667


In [155]:
# Pace per 400m for each race.
run_sec_dataframe / [20, 15, 12.5]

Unnamed: 0,8K,6K,5K
Thomas Caulfield,73.04,75.92,74.56
Joseph Smith,73.675,76.68,75.12
Ben Fishbein,74.94,77.72,76.72
Lisa Grohn,,83.6,84.128
Andy Jarombek,79.21,81.366667,79.872
