In [22]:
import pandas as pd
from sklearn.datasets import load_wine, fetch_california_housing
import ipytest
ipytest.autoconfig()

## Constants

In [35]:
FIELDS_MUST_BE_PRESENT = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population', 'AveOccup',
       'Latitude', 'Longitude', 'MedHouseVal']

In [17]:
data = fetch_california_housing(as_frame=True)['frame']

In [38]:
ipytest.reload()
ipytest.clean_tests()

def test_longitude_in_correct_range():
    """Тестируем, что долгота не выходит за пределы допустимых значений."""
    assert data.Longitude.min() >= -180 # minimum possible value is correct
    assert data.Longitude.max() <= 180 # maximum possible value is correct

    
def test_latitude_in_correct_range():
    """Тестируем, что широта не выходит за пределы допустимых значений."""
    assert data.Latitude.min() >= -90 # minimum possible value is correct
    assert data.Latitude.max() <= 90 # maximum possible value is correct
    
    
def test_house_value_not_negative():
    """Тестируем, стоимость дома не отрицательная."""
    assert data.MedHouseVal.min() >= 0
    
    
def test_no_duplicated_rows():
    """Тестируем, что нет дублирующихся строк."""
    assert data.duplicated().sum() == 0
    
def test_no_excess_fields():
    """Тестируем, что нет лишних полей."""
    assert data.columns.isin(FIELDS_MUST_BE_PRESENT).all()

def test_all_required_fields_present():
    """Тестируем, что все необходимые поля присутствуют."""
    fields_not_present = [f for f in FIELDS_MUST_BE_PRESENT if f not in data.columns]
    assert fields_not_present == []

ipytest.run()

[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m.[0m[32m                                                                                       [100%][0m
[32m[32m[1m6 passed[0m[32m in 0.03s[0m[0m


<ExitCode.OK: 0>

# Описание набора данных

In [20]:
print(fetch_california_housing(as_frame=True)['DESCR'])

.. _california_housing_dataset:

California Housing dataset
--------------------------

**Data Set Characteristics:**

    :Number of Instances: 20640

    :Number of Attributes: 8 numeric, predictive attributes and the target

    :Attribute Information:
        - MedInc        median income in block group
        - HouseAge      median house age in block group
        - AveRooms      average number of rooms per household
        - AveBedrms     average number of bedrooms per household
        - Population    block group population
        - AveOccup      average number of household members
        - Latitude      block group latitude
        - Longitude     block group longitude

    :Missing Attribute Values: None

This dataset was obtained from the StatLib repository.
https://www.dcc.fc.up.pt/~ltorgo/Regression/cal_housing.html

The target variable is the median house value for California districts,
expressed in hundreds of thousands of dollars ($100,000).

This dataset was derived

# Ресурсы

Репозиторий пакета: https://github.com/chmp/ipytest
Репозиторий, где можно найти этот ноутбук: https://github.com/4eshireCat/analytics_cookbook