### Documentation URL ###
- https://docs.greatexpectations.io/docs/guides/setup/setup_overview

In [13]:
import pandas as pd
import great_expectations as ge

In [9]:
source_url = "https://raw.githubusercontent.com/prodramp/publiccode/master/datasets/titanic.csv"

## You should not be using pandas library to ready

In [10]:
df = pd.read_csv(source_url)

## Let Great Expectations to read the Data itself first

In [14]:
df = ge.read_csv(source_url)

In [15]:
df

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.2500,,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.9250,,S
3,4,1,1,"Futrelle, Mrs. Jacques Heath (Lily May Peel)",female,35.0,1,0,113803,53.1000,C123,S
4,5,0,3,"Allen, Mr. William Henry",male,35.0,0,0,373450,8.0500,,S
...,...,...,...,...,...,...,...,...,...,...,...,...
886,887,0,2,"Montvila, Rev. Juozas",male,27.0,0,0,211536,13.0000,,S
887,888,1,1,"Graham, Miss. Margaret Edith",female,19.0,0,0,112053,30.0000,B42,S
888,889,0,3,"Johnston, Miss. Catherine Helen ""Carrie""",female,,1,2,W./C. 6607,23.4500,,S
889,890,1,1,"Behr, Mr. Karl Howell",male,26.0,0,0,111369,30.0000,C148,C


In [45]:
df.expect_column_to_exist('Fare')

{
  "meta": {},
  "result": {},
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [46]:
df.expect_column_to_exist('NotFound')

{
  "meta": {},
  "result": {},
  "success": false,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [23]:
df.expect_column_values_to_be_between('Fare', 1, 1000)

{
  "meta": {},
  "result": {
    "element_count": 891,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 15,
    "unexpected_percent": 1.6835016835016834,
    "unexpected_percent_total": 1.6835016835016834,
    "unexpected_percent_nonmissing": 1.6835016835016834,
    "partial_unexpected_list": [
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0
    ]
  },
  "success": false,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [27]:
df.expect_column_values_to_be_between('Fare', 1, 1000, mostly=0.80)

{
  "meta": {},
  "result": {
    "element_count": 891,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 15,
    "unexpected_percent": 1.6835016835016834,
    "unexpected_percent_total": 1.6835016835016834,
    "unexpected_percent_nonmissing": 1.6835016835016834,
    "partial_unexpected_list": [
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0,
      0.0
    ]
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [32]:
df.expect_column_values_to_not_be_null("Cabin")

{
  "meta": {},
  "result": {
    "element_count": 891,
    "unexpected_count": 687,
    "unexpected_percent": 77.10437710437711,
    "unexpected_percent_total": 77.10437710437711,
    "partial_unexpected_list": []
  },
  "success": false,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [18]:
df.expect_column_values_to_be_in_set('Sex', ['male', 'female'])

{
  "meta": {},
  "result": {
    "element_count": 891,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [33]:
config = df.get_expectations_config()

In [34]:
config

{
  "meta": {
    "great_expectations_version": "0.14.9"
  },
  "ge_cloud_id": null,
  "expectation_suite_name": "default",
  "data_asset_type": "Dataset",
  "expectations": [
    {
      "meta": {},
      "kwargs": {
        "column": "Sex",
        "value_set": [
          "male",
          "female"
        ]
      },
      "expectation_type": "expect_column_values_to_be_in_set"
    },
    {
      "meta": {},
      "kwargs": {
        "column": "Fare",
        "min_value": 1,
        "max_value": 1000,
        "mostly": 0.8
      },
      "expectation_type": "expect_column_values_to_be_between"
    }
  ]
}

In [37]:
df.save_expectation_suite('saved_titanic_exp.json')

In [38]:
!ls

Expectation Context.ipynb
Great Expectations Initialize.ipynb
Introduction.ipynb
Package Installation.ipynb
Pandas Profiling with Great Expectations.ipynb
Quick Expectation Tutorial.ipynb
[1m[36mgreat_expectations[m[m
saved_titanic_exp.json


In [39]:
!cat saved_titanic_exp.json

{
  "data_asset_type": "Dataset",
  "expectation_suite_name": "default",
  "expectations": [
    {
      "expectation_type": "expect_column_values_to_be_in_set",
      "kwargs": {
        "column": "Sex",
        "value_set": [
          "male",
          "female"
        ]
      },
      "meta": {}
    },
    {
      "expectation_type": "expect_column_values_to_be_between",
      "kwargs": {
        "column": "Fare",
        "max_value": 1000,
        "min_value": 1,
        "mostly": 0.8
      },
      "meta": {}
    }
  ],
  "ge_cloud_id": null,
  "meta": {
    "great_expectations_version": "0.14.9"
  }
}

In [40]:
new_config = df.get_expectations_config('saved_titanic_exp.json')



In [41]:
new_config

{
  "meta": {
    "great_expectations_version": "0.14.9"
  },
  "ge_cloud_id": null,
  "expectation_suite_name": "default",
  "data_asset_type": "Dataset",
  "expectations": [
    {
      "meta": {},
      "kwargs": {
        "column": "Sex",
        "value_set": [
          "male",
          "female"
        ]
      },
      "expectation_type": "expect_column_values_to_be_in_set"
    },
    {
      "meta": {},
      "kwargs": {
        "column": "Fare",
        "min_value": 1,
        "max_value": 1000,
        "mostly": 0.8
      },
      "expectation_type": "expect_column_values_to_be_between"
    }
  ]
}

In [42]:
df.validate(new_config)

{
  "meta": {
    "great_expectations_version": "0.14.9",
    "expectation_suite_name": "default",
    "run_id": {
      "run_name": null,
      "run_time": "2022-03-08T08:23:58.699223+00:00"
    },
    "batch_kwargs": {
      "ge_batch_id": "e435e4b8-9eb6-11ec-a7d6-acde48001122"
    },
    "batch_markers": {},
    "batch_parameters": {},
    "validation_time": "20220308T082358.699097Z",
    "expectation_suite_meta": {
      "great_expectations_version": "0.14.9"
    }
  },
  "success": true,
  "evaluation_parameters": {},
  "results": [
    {
      "meta": {},
      "result": {
        "element_count": 891,
        "missing_count": 0,
        "missing_percent": 0.0,
        "unexpected_count": 0,
        "unexpected_percent": 0.0,
        "unexpected_percent_total": 0.0,
        "unexpected_percent_nonmissing": 0.0,
        "partial_unexpected_list": []
      },
      "success": true,
      "exception_info": {
        "raised_exception": false,
        "exception_message": null,
     

In [43]:
df.expect_select_column_values_to_be_unique_within_record(['Name'])

{
  "meta": {},
  "result": {
    "element_count": 891,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}

In [44]:
df.expect_select_column_values_to_be_unique_within_record(['Ticket'])

{
  "meta": {},
  "result": {
    "element_count": 891,
    "missing_count": 0,
    "missing_percent": 0.0,
    "unexpected_count": 0,
    "unexpected_percent": 0.0,
    "unexpected_percent_total": 0.0,
    "unexpected_percent_nonmissing": 0.0,
    "partial_unexpected_list": []
  },
  "success": true,
  "exception_info": {
    "raised_exception": false,
    "exception_traceback": null,
    "exception_message": null
  }
}