## Pandas Profiling: NASA Meteorites example
Source of data: https://data.nasa.gov/Space-Science/Meteorite-Landings/gh4g-9sfh

### Import libraries

In [7]:
from pathlib import Path

import requests
import numpy as np
import pandas as pd

import pandas_profiling
from pandas_profiling.utils.cache import cache_file

ModuleNotFoundError: No module named 'pandas_profiling'

### Load and prepare example dataset
We add some fake variables for illustrating pandas-profiling capabilities

In [2]:
file_name = cache_file(
    "meteorites.csv",
    "https://data.nasa.gov/api/views/gh4g-9sfh/rows.csv?accessType=DOWNLOAD",
)
    
df = pd.read_csv(file_name)
    
# Note: Pandas does not support dates before 1880, so we ignore these for this analysis
df['year'] = pd.to_datetime(df['year'], errors='coerce')

# Example: Constant variable
df['source'] = "NASA"

# Example: Boolean variable
df['boolean'] = np.random.choice([True, False], df.shape[0])

# Example: Mixed with base types
df['mixed'] = np.random.choice([1, "A"], df.shape[0])

# Example: Highly correlated variables
df['reclat_city'] = df['reclat'] + np.random.normal(scale=5,size=(len(df)))

# Example: Duplicate observations
duplicates_to_add = pd.DataFrame(df.iloc[0:10])
duplicates_to_add[u'name'] = duplicates_to_add[u'name'] + " copy"

df = df.append(duplicates_to_add, ignore_index=True)

NameError: name 'cache_file' is not defined

### Inline report without saving object

In [3]:
report = df.profile_report(sort='None', html={'style':{'full_width': True}}, progress_bar=False)
report

NameError: name 'df' is not defined

### Save report to file

In [4]:
profile_report = df.profile_report(html={'style': {'full_width': True}})
profile_report.to_file("/tmp/example.html")

NameError: name 'df' is not defined

### More analysis (Unicode) and Print existing ProfileReport object inline

In [5]:
profile_report = df.profile_report(explorative=True, html={'style': {'full_width': True}})
profile_report

NameError: name 'df' is not defined

### Notebook Widgets

In [6]:
profile_report.to_widgets()

NameError: name 'profile_report' is not defined