<img src="https://github.com/danielscarvalho/data/blob/master/img/FIAP-logo.png?raw=True" style="float:right;" width="200px">

# DATA SCIENCE & STATISTICAL COMPUTING [》](https://www.fiap.com.br/)

## Dataframe & Python

### LAP Parte 2: “Cookbook"

https://pandas.pydata.org/docs/user_guide/cookbook.html#cookbook

Sugestão: Colocar a página do Cookbook lado a lado com este notebook para realizar as operações.

In [2]:
import pandas as pd
import numpy as np

In [3]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [4]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [7]:
df.loc[df.AAA >= 5, "BBB"] = -1

In [8]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,-1,50
2,6,-1,-30
3,7,-1,-50


In [9]:
df.loc[df.AAA >= 5, ["BBB", "CCC"]] = 555

In [10]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,555,555
2,6,555,555
3,7,555,555


In [11]:
df.loc[df.AAA < 5, ["BBB", "CCC"]] = 2000

In [12]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,2000,2000
1,5,555,555
2,6,555,555
3,7,555,555


In [13]:
df_mask = pd.DataFrame(
    {"AAA": [True] * 4, "BBB": [False] * 4, "CCC": [True, False] * 2}
)

In [14]:
df.where(df_mask, -1000)

Unnamed: 0,AAA,BBB,CCC
0,4,-1000,2000
1,5,-1000,-1000
2,6,-1000,555
3,7,-1000,-1000


In [15]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [16]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [17]:
df["logic"] = np.where(df["AAA"] > 5, "high", "low")

In [18]:
df

Unnamed: 0,AAA,BBB,CCC,logic
0,4,10,100,low
1,5,20,50,low
2,6,30,-30,high
3,7,40,-50,high


In [19]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [20]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [21]:
df[df.AAA <= 5]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50


In [22]:
df[df.AAA > 5]

Unnamed: 0,AAA,BBB,CCC
2,6,30,-30
3,7,40,-50


In [23]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [24]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [25]:
df.loc[(df["BBB"] < 25) & (df["CCC"] >= -40), "AAA"]

0    4
1    5
Name: AAA, dtype: int64

In [26]:
df.loc[(df["BBB"] > 25) | (df["CCC"] >= -40), "AAA"]

0    4
1    5
2    6
3    7
Name: AAA, dtype: int64

In [27]:
df.loc[(df["BBB"] > 25) | (df["CCC"] >= 75), "AAA"] = 999

In [28]:
df

Unnamed: 0,AAA,BBB,CCC
0,999,10,100
1,5,20,50
2,999,30,-30
3,999,40,-50


In [29]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [30]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [31]:
aValue = 43.0

In [32]:
df.loc[(df.CCC - aValue).abs().argsort()]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
0,4,10,100
2,6,30,-30
3,7,40,-50


In [33]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [34]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [35]:
Crit1 = df.AAA <= 5.5

In [36]:
Crit2 = df.BBB == 10.0

In [37]:
Crit3 = df.CCC > -40.0

In [38]:
AllCrit = Crit1 & Crit2 & Crit3

In [39]:
import functools

In [40]:
CritList = [Crit1, Crit2, Crit3]

In [41]:
AllCrit = functools.reduce(lambda x, y: x & y, CritList)

In [42]:
df[AllCrit]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100


In [43]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [44]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [45]:
df[(df.AAA <= 6) & (df.index.isin([0, 2, 4]))]

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
2,6,30,-30


In [46]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]},
    index=["foo", "bar", "boo", "kar"],
)

In [47]:
df.loc["bar":"kar"]  # Label

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [48]:
df[0:3]

Unnamed: 0,AAA,BBB,CCC
foo,4,10,100
bar,5,20,50
boo,6,30,-30


In [49]:
df["bar":"kar"]

Unnamed: 0,AAA,BBB,CCC
bar,5,20,50
boo,6,30,-30
kar,7,40,-50


In [50]:
data = {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}

In [51]:
df2 = pd.DataFrame(data=data, index=[1, 2, 3, 4])  # Note index starts at 1.

In [52]:
df2.iloc[1:3]  # Position-oriented

Unnamed: 0,AAA,BBB,CCC
2,5,20,50
3,6,30,-30


In [53]:
df2.loc[1:3]  # Label-oriented

Unnamed: 0,AAA,BBB,CCC
1,4,10,100
2,5,20,50
3,6,30,-30


In [54]:
df = pd.DataFrame(
    {"AAA": [4, 5, 6, 7], "BBB": [10, 20, 30, 40], "CCC": [100, 50, -30, -50]}
)

In [55]:
df

Unnamed: 0,AAA,BBB,CCC
0,4,10,100
1,5,20,50
2,6,30,-30
3,7,40,-50


In [56]:
df[~((df.AAA <= 6) & (df.index.isin([0, 2, 4])))]

Unnamed: 0,AAA,BBB,CCC
1,5,20,50
3,7,40,-50


In [57]:
df = pd.DataFrame({"AAA": [1, 2, 1, 3], "BBB": [1, 1, 2, 2], "CCC": [2, 1, 3, 1]})

In [58]:
df

Unnamed: 0,AAA,BBB,CCC
0,1,1,2
1,2,1,1
2,1,2,3
3,3,2,1


In [59]:
source_cols = df.columns  # Or some subset would work too

In [60]:
new_cols = [str(x) + "_cat" for x in source_cols]

In [61]:
categories = {1: "Alpha", 2: "Beta", 3: "Charlie"}

In [65]:
df[new_cols] = df[source_cols].map(categories.get)

KeyError: "['CCC'] not in index"

In [63]:
df

Unnamed: 0,AAA,BBB,CCC
0,1,1,2
1,2,1,1
2,1,2,3
3,3,2,1


In [64]:
df = pd.DataFrame(
    {"AAA": [1, 1, 1, 2, 2, 2, 3, 3], "BBB": [2, 1, 3, 4, 5, 1, 2, 3]}
)

In [66]:
df

Unnamed: 0,AAA,BBB
0,1,2
1,1,1
2,1,3
3,2,4
4,2,5
5,2,1
6,3,2
7,3,3


In [67]:
df.loc[df.groupby("AAA")["BBB"].idxmin()]

Unnamed: 0,AAA,BBB
1,1,1
5,2,1
6,3,2


In [68]:
df.sort_values(by="BBB").groupby("AAA", as_index=False).first()

Unnamed: 0,AAA,BBB
0,1,1
1,2,1
2,3,2


In [None]:
df = pd.DataFrame(
    {
        "row": [0, 1, 2],
        "One_X": [1.1, 1.1, 1.1],
        "One_Y": [1.2, 1.2, 1.2],
        "Two_X": [1.11, 1.11, 1.11],
        "Two_Y": [1.22, 1.22, 1.22],
    }
)

In [None]:
df

In [None]:
df = df.set_index("row")

In [None]:
df

In [None]:
df.columns = pd.MultiIndex.from_tuples([tuple(c.split("_")) for c in df.columns])

In [None]:
df

In [None]:
df = df.stack(0, future_stack=True).reset_index(1)

In [None]:
df

In [None]:
df.columns = ["Sample", "All_X", "All_Y"]

In [None]:
df

In [None]:
cols = pd.MultiIndex.from_tuples(
    [(x, y) for x in ["A", "B", "C"] for y in ["O", "I"]]
)

In [None]:
df = pd.DataFrame(np.random.randn(2, 6), index=["n", "m"], columns=cols)

In [None]:
df

In [None]:
df = df.div(df["C"], level=1)

In [None]:
df

In [None]:
coords = [("AA", "one"), ("AA", "six"), ("BB", "one"), ("BB", "two"), ("BB", "six")]

In [None]:
index = pd.MultiIndex.from_tuples(coords)

In [None]:
df = pd.DataFrame([11, 22, 33, 44, 55], index, ["MyData"])

In [None]:
df

In [None]:
df.xs("BB", level=0, axis=0)

In [None]:
df.xs("six", level=1, axis=0)

In [None]:
import itertools

In [None]:
index = list(itertools.product(["Ada", "Quinn", "Violet"], ["Comp", "Math", "Sci"]))

In [None]:
headr = list(itertools.product(["Exams", "Labs"], ["I", "II"]))

In [None]:
indx = pd.MultiIndex.from_tuples(index, names=["Student", "Course"])

In [None]:
cols = pd.MultiIndex.from_tuples(headr)  # Notice these are un-named

In [None]:
data = [[70 + x + y + (x * y) % 3 for x in range(4)] for y in range(9)]

In [None]:
df = pd.DataFrame(data, indx, cols)

In [None]:
df

In [None]:
All = slice(None)

In [None]:
df.loc["Violet"]

In [None]:
df.loc[(All, "Math"), All]

In [None]:
df.loc[(slice("Ada", "Quinn"), "Math"), All]

In [None]:
df.loc[(All, "Math"), ("Exams")]

In [None]:
df.loc[(All, "Math"), (All, "II")]

In [None]:
df.sort_values(by=("Labs", "II"), ascending=False)

In [None]:
df = pd.DataFrame(
    np.random.randn(6, 1),
    index=pd.date_range("2013-08-01", periods=6, freq="B"),
    columns=list("A"),
)

In [None]:
df.loc[df.index[3], "A"] = np.nan

In [None]:
df

In [None]:
df = pd.DataFrame(
    {
        "animal": "cat dog cat fish dog cat cat".split(),
        "size": list("SSMMMLL"),
        "weight": [8, 10, 11, 1, 20, 12, 12],
        "adult": [False] * 5 + [True] * 2,
    }
)

In [None]:
df

In [None]:
df.groupby("animal").apply(lambda subf: subf["size"][subf["weight"].idxmax()], include_groups=False)

In [None]:
gb = df.groupby("animal")

In [None]:
gb.get_group("cat")

In [None]:
def GrowUp(x):
    avg_weight = sum(x[x["size"] == "S"].weight * 1.5)
    avg_weight += sum(x[x["size"] == "M"].weight * 1.25)
    avg_weight += sum(x[x["size"] == "L"].weight)
    avg_weight /= len(x)
    return pd.Series(["L", avg_weight, True], index=["size", "weight", "adult"])

In [None]:
expected_df = gb.apply(GrowUp, include_groups=False)

In [None]:
expected_df

In [None]:
S = pd.Series([i / 100.0 for i in range(1, 11)])

In [None]:
def cum_ret(x, y):
    return x * (1 + y)

In [None]:
def red(x):
    return functools.reduce(cum_ret, x, 1.0)

In [None]:
S.expanding().apply(red, raw=True)

In [None]:
df = pd.DataFrame({"A": [1, 1, 2, 2], "B": [1, -1, 1, 2]})

In [None]:
gb = df.groupby("A")

In [None]:
def replace(g):
    mask = g < 0
    return g.where(~mask, g[~mask].mean())

In [None]:
gb.transform(replace)

In [None]:
df = pd.DataFrame(
    {
        "code": ["foo", "bar", "baz"] * 2,
        "data": [0.16, -0.21, 0.33, 0.45, -0.59, 0.62],
        "flag": [False, True] * 3,
    }
)

In [None]:
code_groups = df.groupby("code")

In [None]:
agg_n_sort_order = code_groups[["data"]].transform("sum").sort_values(by="data")

In [None]:
sorted_df = df.loc[agg_n_sort_order.index]

In [None]:
sorted_df

In [None]:
rng = pd.date_range(start="2014-10-07", periods=10, freq="2min")

In [None]:
ts = pd.Series(data=list(range(10)), index=rng)

In [None]:
def MyCust(x):
    if len(x) > 2:
        return x.iloc[1] * 1.234
    return pd.NaT


In [None]:
mhc = {"Mean": "mean", "Max": "max", "Custom": MyCust}

In [None]:
ts.resample("5min").apply(mhc)

In [None]:
ts

In [None]:
df = pd.DataFrame(
    {"Color": "Red Red Red Blue".split(), "Value": [100, 150, 50, 50]}
)

In [None]:
df

In [None]:
df["Counts"] = df.groupby(["Color"]).transform(len)

In [None]:
df

In [None]:
df = pd.DataFrame(
    {"line_race": [10, 10, 8, 10, 10, 8], "beyer": [99, 102, 103, 103, 88, 100]},
    index=[
        "Last Gunfighter",
        "Last Gunfighter",
        "Last Gunfighter",
        "Paynter",
        "Paynter",
        "Paynter",
    ],
)

In [None]:
df

In [None]:
df["beyer_shifted"] = df.groupby(level=0)["beyer"].shift(1)

In [None]:
df

In [None]:
df = pd.DataFrame(
    {
        "host": ["other", "other", "that", "this", "this"],
        "service": ["mail", "web", "mail", "mail", "web"],
        "no": [1, 2, 1, 2, 1],
    }
).set_index(["host", "service"])

In [None]:
mask = df.groupby(level=0).agg("idxmax")

In [None]:
df_count = df.loc[mask["no"]].reset_index()

In [None]:
df_count

In [None]:
df = pd.DataFrame([0, 1, 0, 1, 1, 1, 0, 1, 1], columns=["A"])

In [None]:
df["A"].groupby((df["A"] != df["A"].shift()).cumsum()).groups

In [None]:
df["A"].groupby((df["A"] != df["A"].shift()).cumsum()).cumsum()

In [None]:
df = pd.DataFrame(
    data={
        "Case": ["A", "A", "A", "B", "A", "A", "B", "A", "A"],
        "Data": np.random.randn(9),
    }
)

In [None]:
dfs = list(
    zip(
        *df.groupby(
            (1 * (df["Case"] == "B"))
            .cumsum()
            .rolling(window=3, min_periods=1)
            .median()
        )
    )
)[-1]

In [None]:
dfs[0]

In [None]:
dfs[1]

In [None]:
dfs[2]

In [None]:
df = pd.DataFrame(
    data={
        "Province": ["ON", "QC", "BC", "AL", "AL", "MN", "ON"],
        "City": [
            "Toronto",
            "Montreal",
            "Vancouver",
            "Calgary",
            "Edmonton",
            "Winnipeg",
            "Windsor",
        ],
        "Sales": [13, 6, 16, 8, 4, 3, 1],
    }
)

In [None]:
table = pd.pivot_table(
    df,
    values=["Sales"],
    index=["Province"],
    columns=["City"],
    aggfunc="sum",
    margins=True,
)

In [None]:
table.stack("City", future_stack=True)

In [None]:
grades = [48, 99, 75, 80, 42, 80, 72, 68, 36, 78]

In [None]:
df = pd.DataFrame(
    {
        "ID": ["x%d" % r for r in range(10)],
        "Gender": ["F", "M", "F", "M", "F", "M", "F", "M", "M", "M"],
        "ExamYear": [
            "2007",
            "2007",
            "2007",
            "2008",
            "2008",
            "2008",
            "2008",
            "2009",
            "2009",
            "2009",
        ],
        "Class": [
            "algebra",
            "stats",
            "bio",
            "algebra",
            "algebra",
            "stats",
            "stats",
            "algebra",
            "bio",
            "bio",
        ],
        "Participated": [
            "yes",
            "yes",
            "yes",
            "yes",
            "no",
            "yes",
            "yes",
            "yes",
            "yes",
            "yes",
        ],
        "Passed": ["yes" if x > 50 else "no" for x in grades],
        "Employed": [
            True,
            True,
            True,
            False,
            False,
            False,
            False,
            True,
            True,
            False,
        ],
        "Grade": grades,
    }
)

In [None]:
df.groupby("ExamYear").agg(
    {
        "Participated": lambda x: x.value_counts()["yes"],
        "Passed": lambda x: sum(x == "yes"),
        "Employed": lambda x: sum(x),
        "Grade": lambda x: sum(x) / len(x),
    }
)

In [None]:
df = pd.DataFrame(
    {"value": np.random.randn(36)},
    index=pd.date_range("2011-01-01", freq="ME", periods=36),
)

In [None]:
pd.pivot_table(
    df, index=df.index.month, columns=df.index.year, values="value", aggfunc="sum"
)

In [None]:
df = pd.DataFrame(
    data={
        "A": [[2, 4, 8, 16], [100, 200], [10, 20, 30]],
        "B": [["a", "b", "c"], ["jj", "kk"], ["ccc"]],
    },
    index=["I", "II", "III"],
)

In [None]:
def SeriesFromSubList(aList):
    return pd.Series(aList)

In [None]:
df_orgz = pd.concat(
    {ind: row.apply(SeriesFromSubList) for ind, row in df.iterrows()}
)

In [None]:
df_orgz

In [None]:
df = pd.DataFrame(
    data=np.random.randn(2000, 2) / 10000,
    index=pd.date_range("2001-01-01", periods=2000),
    columns=["A", "B"],
)

In [None]:
df

In [None]:
def gm(df, const):
    v = ((((df["A"] + df["B"]) + 1).cumprod()) - 1) * const
    return v.iloc[-1]

In [None]:
s = pd.Series(
    {
        df.index[i]: gm(df.iloc[i: min(i + 51, len(df) - 1)], 5)
        for i in range(len(df) - 50)
    }
)

In [None]:
s

In [None]:
rng = pd.date_range(start="2014-01-01", periods=100)

In [None]:
df = pd.DataFrame(
    {
        "Open": np.random.randn(len(rng)),
        "Close": np.random.randn(len(rng)),
        "Volume": np.random.randint(100, 2000, len(rng)),
    },
    index=rng,
)

In [None]:
df

In [None]:
def vwap(bars):
    return (bars.Close * bars.Volume).sum() / bars.Volume.sum()

In [None]:
window = 5

In [None]:
s = pd.concat(
    [
        (pd.Series(vwap(df.iloc[i: i + window]), index=[df.index[i + window]]))
        for i in range(len(df) - window)
    ]
)

In [None]:
s.round(2)

In [None]:
dates = pd.date_range("2000-01-01", periods=5)

In [None]:
dates.to_period(freq="M").to_timestamp()

In [None]:
rng = pd.date_range("2000-01-01", periods=6)

In [None]:
df1 = pd.DataFrame(np.random.randn(6, 3), index=rng, columns=["A", "B", "C"])

In [None]:
df2 = df1.copy()

In [None]:
df = pd.concat([df1, df2], ignore_index=True)

In [None]:
df

In [None]:
df = pd.DataFrame(
    data={
        "Area": ["A"] * 5 + ["C"] * 2,
        "Bins": [110] * 2 + [160] * 3 + [40] * 2,
        "Test_0": [0, 1, 0, 1, 2, 0, 1],
        "Data": np.random.randn(7),
    }
)

In [None]:
df

In [None]:
df["Test_1"] = df["Test_0"] - 1

In [None]:
pd.merge(
    df,
    df,
    left_on=["Bins", "Area", "Test_0"],
    right_on=["Bins", "Area", "Test_1"],
    suffixes=("_L", "_R"),
)

In [None]:
df = pd.DataFrame(
    {
        "stratifying_var": np.random.uniform(0, 100, 20),
        "price": np.random.normal(100, 5, 20),
    }
)

In [None]:
df["quartiles"] = pd.qcut(
    df["stratifying_var"], 4, labels=["0-25%", "25-50%", "50-75%", "75-100%"]
)

In [None]:
df.boxplot(column="price", by="quartiles")

In [None]:
for i in range(3):
    data = pd.DataFrame(np.random.randn(10, 4))
    data.to_csv("file_{}.csv".format(i))

In [None]:
files = ["file_0.csv", "file_1.csv", "file_2.csv"]

In [None]:
result = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)

In [None]:
import glob

In [None]:
import os

In [None]:
files = glob.glob("file_*.csv")

In [None]:
result = pd.concat([pd.read_csv(f) for f in files], ignore_index=True)

In [None]:
i = pd.date_range("20000101", periods=10000)

In [None]:
df = pd.DataFrame({"year": i.year, "month": i.month, "day": i.day})

In [None]:
df.head()

In [None]:
%timeit pd.to_datetime(df.year * 10000 + df.month * 100 + df.day, format='%Y%m%d')
ds = df.apply(lambda x: "%04d%02d%02d" % (x["year"], x["month"], x["day"]), axis=1)
ds.head()
%timeit pd.to_datetime(ds)

In [None]:
data = """;;;;
 ;;;;
 ;;;;
 ;;;;
 ;;;;
 ;;;;
;;;;
 ;;;;
 ;;;;
;;;;
date;Param1;Param2;Param4;Param5
    ;m²;°C;m²;m
;;;;
01.01.1990 00:00;1;1;2;3
01.01.1990 01:00;5;3;4;5
01.01.1990 02:00;9;5;6;7
01.01.1990 03:00;13;7;8;9
01.01.1990 04:00;17;9;10;11
01.01.1990 05:00;21;11;12;13
"""


In [None]:
from io import StringIO

In [None]:
pd.read_csv(
    StringIO(data),
    sep=";",
    skiprows=[11, 12],
    index_col=0,
    parse_dates=True,
    header=10,
)

In [None]:
pd.read_csv(StringIO(data), sep=";", header=10, nrows=10).columns

In [None]:
columns = pd.read_csv(StringIO(data), sep=";", header=10, nrows=10).columns

In [None]:
pd.read_csv(
    StringIO(data), sep=";", index_col=0, header=12, parse_dates=True, names=columns
)

In [None]:
df = pd.DataFrame(np.random.randn(8, 3))

In [None]:
store = pd.HDFStore("test.h5")

In [None]:
store.put("df", df)

In [None]:
store.get_storer("df").attrs.my_attribute = {"A": 10}

In [None]:
store.get_storer("df").attrs.my_attribute

In [None]:
store = pd.HDFStore("test.h5", "w", driver="H5FD_CORE")

In [None]:
df = pd.DataFrame(np.random.randn(8, 3))

In [None]:
store["test"] = df

In [None]:
store.close()

In [None]:
df = pd.DataFrame(np.random.random(size=(100, 5)))

In [None]:
corr_mat = df.corr()

In [None]:
mask = np.tril(np.ones_like(corr_mat, dtype=np.bool_), k=-1)

In [None]:
corr_mat.where(mask)

In [None]:
def distcorr(x, y):
    n = len(x)
    a = np.zeros(shape=(n, n))
    b = np.zeros(shape=(n, n))
    for i in range(n):
        for j in range(i + 1, n):
            a[i, j] = abs(x[i] - x[j])
            b[i, j] = abs(y[i] - y[j])
    a += a.T
    b += b.T
    a_bar = np.vstack([np.nanmean(a, axis=0)] * n)
    b_bar = np.vstack([np.nanmean(b, axis=0)] * n)
    A = a - a_bar - a_bar.T + np.full(shape=(n, n), fill_value=a_bar.mean())
    B = b - b_bar - b_bar.T + np.full(shape=(n, n), fill_value=b_bar.mean())
    cov_ab = np.sqrt(np.nansum(A * B)) / n
    std_a = np.sqrt(np.sqrt(np.nansum(A ** 2)) / n)
    std_b = np.sqrt(np.sqrt(np.nansum(B ** 2)) / n)
    return cov_ab / std_a / std_b

In [None]:
df = pd.DataFrame(np.random.normal(size=(100, 3)))

In [None]:
df.corr(method=distcorr)

In [None]:
import datetime

In [None]:
s = pd.Series(pd.date_range("2012-1-1", periods=3, freq="D"))

In [None]:
s - s.max()

In [None]:
s.max() - s

In [None]:
s - datetime.datetime(2011, 1, 1, 3, 5)

In [None]:
s + datetime.timedelta(minutes=5)

In [None]:
datetime.datetime(2011, 1, 1, 3, 5) - s

In [None]:
datetime.timedelta(minutes=5) + s

In [None]:
deltas = pd.Series([datetime.timedelta(days=i) for i in range(3)])

In [None]:
df = pd.DataFrame({"A": s, "B": deltas})

In [None]:
df

In [None]:
df["New Dates"] = df["A"] + df["B"]

In [None]:
df["Delta"] = df["A"] - df["New Dates"]

In [None]:
df

In [None]:
df.dtypes

In [None]:
y = s - s.shift()

In [None]:
y

In [None]:
y[1] = np.nan

In [None]:
y

In [None]:
def expand_grid(data_dict):
    rows = itertools.product(*data_dict.values())
    return pd.DataFrame.from_records(rows, columns=data_dict.keys())

In [None]:
df = expand_grid(
    {"height": [60, 70], "weight": [100, 140, 180], "sex": ["Male", "Female"]}
)

In [None]:
df

In [None]:
v = s.to_numpy()

In [None]:
is_constant = v.shape[0] == 0 or (s[0] == s).all()

In [None]:
v = s.dropna().to_numpy()

In [None]:
is_constant = v.shape[0] == 0 or (s[0] == s).all()

In [None]:
v = s.to_numpy()

In [None]:
is_constant = v.shape[0] == 0 or (s[0] == s).all() or not pd.notna(v).any()