In [None]:
import json
import numpy as np
import pandas as pd

In [None]:
json_data = json.dumps({1: {'a': 1, 'b': 2}, 2: {'a': 3, 'b': 4}})

In [None]:
df = pd.read_json(json_data, orient='index')
df

In [None]:
df.to_json()

In [None]:
feb_runs = [
    11.56, 12, 
    2.34, 3.63, 2.85, 3.06, 3.92, 7.87, 12.5, 
    2.81, 3.8, 2.65, 7.5, 2.63, 14, 13.21, 
    1.28, 1.88, 2.64, 5.20, 3.76, 7.87, 12.59, 
    2.81, 2.81, 3.45
]
buckets = [0, 3, 8, 15]
cuts = pd.cut(feb_runs, buckets)
cuts

In [None]:
cuts.codes

In [None]:
cuts.categories

In [None]:
pd.value_counts(cuts)

In [None]:
bucket_names = ["Slept too Long Run", "Regular Run", "Log Run/Workout"]
cuts = pd.cut(feb_runs, buckets, labels=bucket_names)
cuts

In [None]:
pd.value_counts(cuts)

In [None]:
cuts = pd.qcut(feb_runs, 4)
cuts

In [None]:
pd.value_counts(cuts)

In [None]:
race_locations = pd.DataFrame({ 
    'location': [
        "Ocean Breeze Athletic Complex - New York, NY", 
        "The Armory - New York, NY", 
        "Tod's Point - Old Greenwich, CT",
        "Franklin D. Roosevelt State Park - Yorktown Heights, NY"
    ],
    'race_count': [3, 2, 1, 2]
})
race_locations

In [None]:
locations: pd.Series = race_locations.T.loc['location']
locations

In [None]:
locations.str.contains('NY')

In [None]:
pattern = r"([A-Za-z'\.\s]+) - ([A-Za-z'\s]+), ([A-Z]{2})"
locations.str.findall(pattern)

In [None]:
matches = locations.str.findall(pattern).str[0]
matches

In [None]:
states = matches.str.get(2)
states

In [None]:
# Flatten data by using hierarchical indexing
exercises = pd.Series([2.1, 1, 0.5, 2, 2.15], index=[['run', 'run', 'run', 'walk', 'run'], [1, 2, 3, 4, 5]])
exercises

In [None]:
exercises.unstack()

In [None]:
exercises.unstack().stack()

In [None]:
exercises.swaplevel(0, 1)

In [None]:
exercises.sort_index(level=0)

In [None]:
exercises.sum(level=0)

In [None]:
race_locations.set_index(['location'])

In [None]:
users = pd.DataFrame({
    'username': ['andy', 'joe', 'tom', 'fish'],
    'first': ['Andrew', 'Joseph', 'Thomas', 'Benjamin'],
    'last': ['Jarombek', 'Smith', 'Caulfield', 'Fishbein']
})
users

In [None]:
runs = pd.DataFrame({
    'username': ['andy', 'joe', 'andy', 'fish'],
    'date': ['2020-02-28', '2020-02-29', '2020-03-01', '2020-02-28'],
    'distance': [2.1, 8, 13, 5],
    'minutes': [16, 54, 92, 30],
    'seconds': [5, 51, 0, 10]
})
runs

In [None]:
# Implicitly merge on the 'username' column in users and runs.  This is similar to a SQL INNER JOIN.
pd.merge(users, runs)

In [None]:
pd.merge(users, runs, how='inner')

In [None]:
pd.merge(users, runs, how='outer')

In [None]:
pd.merge(users, runs, left_on='username', right_on='username')

In [None]:
# Some ski trails I went on.
morning = pd.Series(['Left Bank', 'West Way', 'Winding Brook', 'Panorama', 'Wild Turkey', 'Cutter'])
afternoon = pd.Series(['Wild Turkey', 'Jericho', 'Bear Crossing', 'Upper Whitetail', 'Lower Whitetail'])

pd.concat([morning, afternoon])

In [None]:
# Passing axis=1 as a parameter concatenates along the x-axis (concats columns), resulting in a data frame.
pd.concat([morning, afternoon], axis=1)

In [None]:
# By default concat with axis=1 performs an outer join on the indexes of each original Series.
pd.concat([morning, afternoon], axis=1, join='outer')

In [None]:
# This behavior can be altered by passing join='inner' as an argument.
pd.concat([morning, afternoon], axis=1, join='inner')

In [None]:
pd.concat([morning, afternoon], keys=['morning', 'afternoon'])

In [None]:
morning_frame = morning.to_frame()
morning_frame['grade'] = ['green', 'blue', 'green', 'blue', 'black diamond', 'black diamond']
morning_frame = morning_frame.set_index([0])
morning_frame

In [None]:
afternoon_frame = afternoon.to_frame()
afternoon_frame['grade'] = ['black diamond', 'double black diamond', 'blue black', 'black diamond', 'black diamond']
afternoon_frame = afternoon_frame.set_index([0])
afternoon_frame

In [None]:
all_trails = morning_frame.combine_first(afternoon_frame)
all_trails

In [None]:
all_trails.reset_index()

In [None]:
miles_races = pd.DataFrame({
    'race': ['NYRR Night at the Races #1', 'Ocean Breeze Miles Mania #4'],
    '209m': ['39.447 (39.447)', '37.615 (37.615)'],
    '409m': ['1:16.524 (37.077)', '1:14.048 (36.433)'],
    '609m': ['1:53.254 (36.730)', '1:50.689 (36.641)'],
    '809m': ['2:30.180 (36.926)', '2:26.830 (36.141)'],
    '1009m': ['3:06.899 (36.720)', '3:02.024 (35.194)'],
    '1209m': ['3:43.565 (36.667)', '3:37.905 (35.881)'],
    '1409m': ['4:19.249 (35.684)', '4:13.632 (35.727)'],
    '1609m': ['4:54.247 (34.999)', '4:47.750 (34.118)']
})
miles_races

In [None]:
pd.melt(miles_races, ['race'])

In [None]:
pd.value_counts(miles_races['race'])

In [None]:
pd.value_counts(pd.melt(miles_races, ['race'])['race'])