In [None]:
# Import from CSV to create Pandas DataFrame

import pandas

from config_secret import DATA_DIR

data_file = DATA_DIR + '/customer_data.csv'
customers = pandas.read_csv(
    data_file,
    sep=',',
    header=0,
    index_col=False,
    parse_dates=['date_of_birth'],
    dayfirst=True,
    tupleize_cols=False,
    error_bad_lines=True,
    warn_bad_lines=True,
    skip_blank_lines=True,
)

In [None]:
# Create DataFrame with columns in the accidents DatataFrame
columns = pandas.DataFrame(list(customers.columns.values))
columns

In [None]:
# Create a DataFrame of the data type of each column
data_types = pandas.DataFrame(
    customers.dtypes,
    columns=['Data Type'],
)
data_types

In [None]:
# Create a DataFrame with the count of missing values
missing_data_counts = pandas.DataFrame(
    customers.isnull().sum(),
    columns=['Missing Values'],
)
missing_data_counts

In [None]:
# Create a DataFrame with present values in each column
present_data_counts = pandas.DataFrame(
    customers.count(),
    columns=['Present Values'],
)
present_data_counts

In [None]:
# Create DataFrame with count of unique values in each column
unique_value_counts = pandas.DataFrame(columns=['Unique Values'])
for v in list(customers.columns.values):
    unique_value_counts.loc[v] = [customers[v].nunique()]
unique_value_counts


In [None]:
# Creata DataFrame with minimum value of each column
minimum_values = pandas.DataFrame(columns=['Minimum Value'])
for v in list(customers.columns.values):
    minimum_values.loc[v] = [customers[v].min()]
minimum_values

In [None]:
# Creata DataFrame with maximum value of each column
maximum_values = pandas.DataFrame(columns=['Maximum Value'])
for v in list(customers.columns.values):
    maximum_values.loc[v] = [customers[v].max()]
maximum_values

In [None]:
# Merge all the DataFrames by index
data_quality_report = data_types.join(
    present_data_counts).join(
    missing_data_counts).join(
    unique_value_counts).join(
    minimum_values).join(
    maximum_values)

print('\nDATA QUALITY REPORT')
print('Total records {}'.format(len(customers.index)))
data_quality_report

In [None]:
# Creating summary statistics
customers.describe().transpose()

In [None]:
customers.describe(include=['object']).transpose()