/
utils.py
131 lines (89 loc) · 2.91 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re
import os
def sample_rows(df, nrows, replace=False):
"""Choose a sample of rows from a DataFrame.
df: DataFrame
nrows: number of rows
replace: whether to sample with replacement
returns: DataDf
"""
indices = np.random.choice(df.index, nrows, replace=replace)
sample = df.loc[indices]
return sample
def resample_rows(df):
"""Resamples rows from a DataFrame.
df: DataFrame
returns: DataFrame
"""
return sample_rows(df, len(df), replace=True)
def resample_rows_weighted(df, column='finalwgt'):
"""Resamples a DataFrame using probabilities proportional to given column.
df: DataFrame
column: string column name to use as weights
returns: DataFrame
"""
weights = df[column]
sample = df.sample(n=len(df), replace=True, weights=weights)
return sample
def values(series):
"""Count the values and sort.
series: pd.Series
returns: series mapping from values to frequencies
"""
return series.value_counts(dropna=False).sort_index()
def round_into_bins(df, var, bin_width, high=None, low=0):
"""Rounds values down to the bin they belong in.
df: DataFrame
var: string variable name
bin_width: number, width of the bins
returns: array of bin values
"""
if high is None:
high = df[var].max()
bins = np.arange(low, high+bin_width, bin_width)
indices = np.digitize(df[var], bins)
return bins[indices-1]
def underride(d, **options):
"""Add key-value pairs to d only if key is not in d.
d: dictionary
options: keyword args to add to d
"""
for key, val in options.items():
d.setdefault(key, val)
return d
def decorate(**options):
"""Decorate the current axes.
Call decorate with keyword arguments like
decorate(title='Title',
xlabel='x',
ylabel='y')
The keyword arguments can be any of the axis properties
https://matplotlib.org/api/axes_api.html
In addition, you can use `legend=False` to suppress the legend.
And you can use `loc` to indicate the location of the legend
(the default value is 'best')
"""
loc = options.pop('loc', 'best')
if options.pop('legend', True):
legend(loc=loc)
plt.gca().set(**options)
plt.tight_layout()
def legend(**options):
"""Draws a legend only if there is at least one labeled item.
options are passed to plt.legend()
https://matplotlib.org/api/_as_gen/matplotlib.pyplot.legend.html
"""
underride(options, loc='best')
ax = plt.gca()
handles, labels = ax.get_legend_handles_labels()
if handles:
ax.legend(handles, labels, **options)
def anchor_legend(x, y):
"""Put the legend at the given location.
x: axis coordinate
y: axis coordinate
"""
plt.legend(bbox_to_anchor=(x, y), loc='upper left', ncol=1)