# Debugging and Testing Pandas

In [2]:
import pandas as pd
import numpy as np
pd.set_option('max_columns', 7,'display.expand_frame_repr', True, # 'max_rows', 10, 
    'max_colwidth', 12, 'max_rows', 10, #'precision', 2
)#, 'width', 45)
pd.set_option('display.width', 65)

## Code to Transform Data

### How to do it...

In [3]:
import zipfile
url = 'data/kaggle-survey-2018.zip'

In [4]:
with zipfile.ZipFile(url) as z:
    print(z.namelist())
    kag = pd.read_csv(z.open('multipleChoiceResponses.csv'))
    kag_questions = kag.iloc[0]
    df = kag.iloc[1:]

['multipleChoiceResponses.csv', 'freeFormResponses.csv', 'SurveySchema.csv']


  interactivity=interactivity, compiler=compiler, result=result)


In [5]:
df.T

Unnamed: 0,1,2,3,...,23857,23858,23859
Time from Start to Finish (seconds),710,434,718,...,370,36,502
Q1,Female,Male,Female,...,Male,Male,Male
Q1_OTHER_TEXT,-1,-1,-1,...,-1,-1,-1
Q2,45-49,30-34,30-34,...,22-24,25-29,25-29
Q3,United S...,Indonesia,United S...,...,Turkey,United K...,Spain
...,...,...,...,...,...,...,...
Q50_Part_5,,,,...,,,
Q50_Part_6,,,,...,,,
Q50_Part_7,,,,...,,,
Q50_Part_8,,,,...,,,


In [6]:
df.dtypes

Time from Start to Finish (seconds)    object
Q1                                     object
Q1_OTHER_TEXT                          object
Q2                                     object
Q3                                     object
                                        ...  
Q50_Part_5                             object
Q50_Part_6                             object
Q50_Part_7                             object
Q50_Part_8                             object
Q50_OTHER_TEXT                         object
Length: 395, dtype: object

In [7]:
df.Q1.value_counts(dropna=False)

Male                       19430
Female                      4010
Prefer not to say            340
Prefer to self-describe       79
Name: Q1, dtype: int64

In [8]:
def tweak_kag(df):
    na_mask = df.Q9.isna()
    hide_mask = df.Q9.str.startswith('I do not').fillna(False)
    df = df[~na_mask & ~hide_mask]


    q1 = (df.Q1
      .replace({'Prefer not to say': 'Another',
               'Prefer to self-describe': 'Another'})
      .rename('Gender')
    )
    q2 = df.Q2.str.slice(0,2).astype(int).rename('Age')
    def limit_countries(val):
        if val in  {'United States of America', 'India', 'China'}:
            return val
        return 'Another'
    q3 = df.Q3.apply(limit_countries).rename('Country')


    q4 = (df.Q4
     .replace({'Master’s degree': 18,
     'Bachelor’s degree': 16,
     'Doctoral degree': 20,
     'Some college/university study without earning a bachelor’s degree': 13,
     'Professional degree': 19,
     'I prefer not to answer': None,
     'No formal education past high school': 12})
     .fillna(11)
     .rename('Edu')
    )


    def only_cs_stat_val(val):
        if val not in {'cs', 'eng', 'stat'}:
            return 'another'
        return val


    q5 = (df.Q5
            .replace({
                'Computer science (software engineering, etc.)': 'cs',
                'Engineering (non-computer focused)': 'eng',
                'Mathematics or statistics': 'stat'})
             .apply(only_cs_stat_val)
             .rename('Studies'))
    def limit_occupation(val):
        if val in {'Student', 'Data Scientist', 'Software Engineer', 'Not employed',
                  'Data Engineer'}:
            return val
        return 'Another'


    q6 = df.Q6.apply(limit_occupation).rename('Occupation')


    q8 = (df.Q8
      .str.replace('+', '')
      .str.split('-', expand=True)
      .iloc[:,0]
      .fillna(-1)
      .astype(int)
      .rename('Experience')
    )


    q9 = (df.Q9
     .str.replace('+','')
     .str.replace(',','')
     .str.replace('500000', '500')
     .str.replace('I do not wish to disclose my approximate yearly compensation','')
     .str.split('-', expand=True)
     .iloc[:,0]
     .astype(int)
     .mul(1000)
     .rename('Salary'))
    return pd.concat([q1, q2, q3, q4, q5, q6, q8, q9], axis=1)

In [9]:
tweak_kag(df)

Unnamed: 0,Gender,Age,Country,...,Occupation,Experience,Salary
2,Male,30,Another,...,Another,5,10000
3,Female,30,United S...,...,Data Sci...,0,0
5,Male,22,India,...,Another,0,0
7,Male,35,Another,...,Another,10,10000
8,Male,18,India,...,Another,0,0
...,...,...,...,...,...,...,...
23844,Male,30,Another,...,Software...,10,90000
23845,Male,22,Another,...,Student,0,0
23854,Male,30,Another,...,Another,5,10000
23855,Male,45,Another,...,Another,5,250000


In [10]:
tweak_kag(df).dtypes

Gender         object
Age             int64
Country        object
Edu           float64
Studies        object
Occupation     object
Experience      int64
Salary          int64
dtype: object

### How it works...

In [11]:
kag = tweak_kag(df)
(kag
    .groupby('Country')
    .apply(lambda g: g.Salary.corr(g.Experience))
)

Country
Another                     0.289827
China                       0.252974
India                       0.167335
United States of America    0.354125
dtype: float64

## Apply Performance

### How to do it...

In [14]:
#%%timeit
def limit_countries(val):
     if val in  {'United States of America', 'India', 'China'}:
         return val
     return 'Another'

In [15]:
q3 = df.Q3.apply(limit_countries).rename('Country')

In [16]:
%%timeit
other_values = df.Q3.value_counts().iloc[3:].index
q3_2 = df.Q3.replace(other_values, 'Another')

31.7 ms ± 4.66 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [17]:
%%timeit
values = {'United States of America', 'India', 'China'}
q3_3 = df.Q3.where(df.Q3.isin(values), 'Another')

2.84 ms ± 147 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [18]:
%%timeit
values = {'United States of America', 'India', 'China'}
q3_4 = pd.Series(np.where(df.Q3.isin(values), df.Q3, 'Another'), 
     index=df.index)

2.46 ms ± 63.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [19]:
q3.equals(q3_2)

NameError: name 'q3_2' is not defined

In [None]:
q3.equals(q3_3)

In [None]:
q3.equals(q3_4)

### How it works...

### There's more...

In [20]:
def debug(something):
    # what is something? A cell, series, dataframe?
    print(type(something), something)
    1/0

In [21]:
q3.apply(debug)

<class 'str'> United States of America


ZeroDivisionError: division by zero

In [22]:
the_item = None
def debug(something):
    global the_item
    the_item = something
    return something

In [23]:
_ = q3.apply(debug)

In [24]:
the_item

'Another'

## Improving Apply Performance with Dask, Pandarell, Swifter, and More

## How to do it...

In [25]:
from pandarallel import pandarallel
pandarallel.initialize()

INFO: Pandarallel will run on 4 workers.
INFO: Pandarallel will use standard multiprocessing data tranfer (pipe) to transfer data between the main process and workers.


In [26]:
def limit_countries(val):
     if val in  {'United States of America', 'India', 'China'}:
         return val
     return 'Another'

In [27]:
%%timeit
res_p = df.Q3.parallel_apply(limit_countries).rename('Country')

96.8 ms ± 5.81 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [28]:
import swifter

In [29]:
%%timeit
res_s = df.Q3.swifter.apply(limit_countries).rename('Country')

HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…




HBox(children=(FloatProgress(value=0.0, description='Pandas Apply', max=23859.0, style=ProgressStyle(descripti…


137 ms ± 8.37 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [30]:
import dask

In [31]:
%%timeit
res_d = (dask.dataframe.from_pandas(
       df, npartitions=4)
   .map_partitions(lambda df: df.Q3.apply(limit_countries))
   .rename('Countries')
)

5.69 s ± 372 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
np_fn = np.vectorize(limit_countries)

In [33]:
%%timeit
res_v = df.Q3.apply(np_fn).rename('Country')

484 ms ± 5.64 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [34]:
from numba import jit

In [35]:
@jit
def limit_countries2(val):
     if val in  {'United States of America', 'India', 'China'}:
         return val
     return 'Another'

In [36]:
%%timeit
res_n = df.Q3.apply(limit_countries2).rename('Country')

134 ms ± 1.86 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### How it works...

## Inspecting Code 

## How to do it...

In [37]:
import zipfile
url = 'data/kaggle-survey-2018.zip'

In [38]:
with zipfile.ZipFile(url) as z:
    print(z.namelist())
    kag = pd.read_csv(z.open('multipleChoiceResponses.csv'))
    kag_questions = kag.iloc[0]
    df = kag.iloc[1:]

['multipleChoiceResponses.csv', 'freeFormResponses.csv', 'SurveySchema.csv']


  interactivity=interactivity, compiler=compiler, result=result)


In [39]:
df.Q3.apply?

In [40]:
df.apply??

In [41]:
import pandas.core.series
pandas.core.series.lib

<module 'pandas._libs.lib' from '/Users/matt/.env/364/lib/python3.6/site-packages/pandas/_libs/lib.cpython-36m-darwin.so'>

In [42]:
pandas.core.series.lib.map_infer??

### How it works...

### There's more...

## Debugging in Jupyter

### How to do it...

In [43]:
import zipfile
url = 'data/kaggle-survey-2018.zip'

In [44]:
with zipfile.ZipFile(url) as z:
    print(z.namelist())
    kag = pd.read_csv(z.open('multipleChoiceResponses.csv'))
    kag_questions = kag.iloc[0]
    df = kag.iloc[1:]

['multipleChoiceResponses.csv', 'freeFormResponses.csv', 'SurveySchema.csv']


In [45]:
def add1(x):
    return x + 1

In [46]:
df.Q3.apply(add1)

TypeError: must be str, not int

In [47]:
from IPython.core.debugger import set_trace

In [48]:
def add1(x):
    set_trace()
    return x + 1

In [49]:
df.Q3.apply(add1)

> [0;32m<ipython-input-48-580fd29da363>[0m(3)[0;36madd1[0;34m()[0m
[0;32m      1 [0;31m[0;32mdef[0m [0madd1[0m[0;34m([0m[0mx[0m[0;34m)[0m[0;34m:[0m[0;34m[0m[0m
[0m[0;32m      2 [0;31m    [0mset_trace[0m[0;34m([0m[0;34m)[0m[0;34m[0m[0m
[0m[0;32m----> 3 [0;31m    [0;32mreturn[0m [0mx[0m [0;34m+[0m [0;36m1[0m[0;34m[0m[0m
[0m
ipdb> c


TypeError: must be str, not int

### How it works...

##  Managing data integrity with Great Expectations

### How to do it...

In [50]:
kag = tweak_kag(df)

In [51]:
import great_expectations as ge
kag_ge = ge.from_pandas(kag)

In [52]:
sorted([x for x in set(dir(kag_ge)) - set(dir(kag))
    if not x.startswith('_')])

['autoinspect',
 'batch_fingerprint',
 'batch_id',
 'batch_kwargs',
 'column_aggregate_expectation',
 'column_map_expectation',
 'column_pair_map_expectation',
 'discard_failing_expectations',
 'edit_expectation_suite',
 'expect_column_bootstrapped_ks_test_p_value_to_be_greater_than',
 'expect_column_chisquare_test_p_value_to_be_greater_than',
 'expect_column_distinct_values_to_be_in_set',
 'expect_column_distinct_values_to_contain_set',
 'expect_column_distinct_values_to_equal_set',
 'expect_column_kl_divergence_to_be_less_than',
 'expect_column_max_to_be_between',
 'expect_column_mean_to_be_between',
 'expect_column_median_to_be_between',
 'expect_column_min_to_be_between',
 'expect_column_most_common_value_to_be_in_set',
 'expect_column_pair_values_A_to_be_greater_than_B',
 'expect_column_pair_values_to_be_equal',
 'expect_column_pair_values_to_be_in_set',
 'expect_column_parameterized_distribution_ks_test_p_value_to_be_greater_than',
 'expect_column_proportion_of_unique_values_to_b

In [53]:
kag_ge.expect_column_to_exist('Salary')

{'success': True}

In [54]:
kag_ge.expect_column_mean_to_be_between(
   'Salary', min_value=10_000, max_value=100_000)

{'success': True,
 'result': {'observed_value': 43869.66102793441,
  'element_count': 15429,
  'missing_count': 0,
  'missing_percent': 0.0}}

In [55]:
kag_ge.expect_column_values_to_be_between(
   'Salary', min_value=0, max_value=500_000)

{'success': True,
 'result': {'element_count': 15429,
  'missing_count': 0,
  'missing_percent': 0.0,
  'unexpected_count': 0,
  'unexpected_percent': 0.0,
  'unexpected_percent_nonmissing': 0.0,
  'partial_unexpected_list': []}}

In [56]:
kag_ge.expect_column_values_to_not_be_null('Salary')

{'success': True,
 'result': {'element_count': 15429,
  'unexpected_count': 0,
  'unexpected_percent': 0.0,
  'partial_unexpected_list': []}}

In [57]:
kag_ge.expect_column_values_to_match_regex(
    'Country', r'America|India|Another|China')

{'success': True,
 'result': {'element_count': 15429,
  'missing_count': 0,
  'missing_percent': 0.0,
  'unexpected_count': 0,
  'unexpected_percent': 0.0,
  'unexpected_percent_nonmissing': 0.0,
  'partial_unexpected_list': []}}

In [58]:
kag_ge.expect_column_values_to_be_of_type(
   'Salary', type_='int')

{'success': True, 'result': {'observed_value': 'int64'}}

In [59]:
kag_ge.save_expectation_suite('/tmp/kaggle_expectations.json')

In [60]:
kag_ge.to_csv('/tmp/kag.csv')
import json
ge.validate(ge.read_csv('/tmp/kag.csv'), 
    expectation_suite=json.load(
        open('/tmp/kaggle_expectations.json')))

{'results': [{'success': True,
   'expectation_config': {'expectation_type': 'expect_column_to_exist',
    'kwargs': {'column': 'Salary'}},
   'exception_info': {'raised_exception': False,
    'exception_message': None,
    'exception_traceback': None}},
  {'success': True,
   'result': {'observed_value': 43869.66102793441,
    'element_count': 15429,
    'missing_count': 0,
    'missing_percent': 0.0},
   'expectation_config': {'expectation_type': 'expect_column_mean_to_be_between',
    'kwargs': {'column': 'Salary', 'min_value': 10000, 'max_value': 100000}},
   'exception_info': {'raised_exception': False,
    'exception_message': None,
    'exception_traceback': None}},
  {'success': True,
   'result': {'element_count': 15429,
    'missing_count': 0,
    'missing_percent': 0.0,
    'unexpected_count': 0,
    'unexpected_percent': 0.0,
    'unexpected_percent_nonmissing': 0.0,
    'partial_unexpected_list': []},
   'expectation_config': {'expectation_type': 'expect_column_values_to_b

### How it works...

## Using pytest with pandas

### How to do it...

### How it works...

### There's more...

## Generating Tests with Hypothesis

### How to do it...

### How it works...