# Exercises 1: Solutions

In [2]:
import pandas as pd

## Missing data

In [None]:
# Missing values result in float data type.
miss = pd.Series([1, 2, 3, None])
miss

In [None]:
type(miss[3])

In [None]:
miss.isna()

In [None]:
# Pandas `Int64` type allows integers with missing values.
miss = pd.Series([1, 2, 3, None], dtype="Int64")
miss

In [None]:
type(miss[3])

In [None]:
miss.isna()

## Splitting multiple values

In [12]:
s = pd.Series(["a,b,c", "d,e"])

In [None]:
# Split using map().
# Note: String values are converted to lists.
s.map(str.split)

In [None]:
# Or using str accessor.
s.str.split(",")

In [None]:
# Use explode() to split into rows.
s.str.split(",").explode()

## Recode values

In [None]:
# Create raw data values.
raw = pd.Series(["A", "A", "A", "B", "B", "C", "C",  "D", "E"])

In [None]:
# Get top 3 values.
raw.value_counts().head(3)

In [None]:
# Get top 3 values as a list.
top = raw.value_counts().head(3).index

In [None]:
# Use a lambda function to recode.
raw.map(lambda v: v if v in top else "other")

### What about missing values?

In [3]:
# Create raw data values.
raw = pd.Series(["A", "A", "A", "B", "B", "C", "C",  "D", "E", None, None, None])

In [None]:
# Get top 3 values as a list
# Note: value_counts excludes missings by default.
top = raw.value_counts().head(3).index
top

In [None]:
# Preserve missing values using extra condition.
raw.map(lambda v: v if v in top or v is None else "other")

### Wrap up

In [None]:
# Combine into a single function.
def recode(values, top=3, other="other"):
    """Recode keeping only most common values."""
    top = values.value_counts().head(3).index
    return values.map(lambda v: v if v in top or v is None else other)


recode(raw)

In [None]:
# Combine into a single function.
def recode(values, top=3, other="other"):
    """Recode keeping only most common values."""
    top = values.value_counts().head(3).index
    return (
        values
        .dropna()
        .map(lambda v: v if v in top else other)
        .reindex(values.index)
    )


recode(raw)