In [None]:
import yaml
import simplejson
import numpy as np
import pandas as pd
import pandas_profiling

import matplotlib.pyplot as plt
plt.switch_backend('agg')


#function to act as converter when default json converter fails
def convert(o):
    if isinstance(o, np.int64): 
        return int(o) 
    elif isinstance(o, np.bool_):
        if o == True:
            return True
        return False

def save_json(df, output_folder, yaml_file, file_name):

    profile = pandas_profiling.ProfileReport(df)

    parent_dict = profile.get_description()

    data = {}
    data["overview"] = parent_dict["table"]
    variables = parent_dict["variables"]

    with open(yaml_file, 'r') as stream:
        try:
            possible_keys = yaml.safe_load(stream)
        except yaml.YAMLError as exc:
            return f"following error occured while loading yaml file: {exc}"

    data["variables general info"] = {}
    data["variables general info"]["header"] = possible_keys


    data["variables frequency info"] = {}
    data["variables frequency info"]["value_counts_with_nan"] = {}
    data["variables frequency info"]["value_counts_without_nan"] = {}

    #For info of each variable
    for variable, dict_ in variables.items():
        temp_list = []
        num_outliers, perc_outliers = None, None

        try:
            five_perc = dict_["5%"]
            nintyfive_perc = dict_["95%"]
        except KeyError:
            five_perc = None
            nintyfive_perc = None
            
        if five_perc != None and nintyfive_perc != None:
            ser = df[variable]
            index_five = np.where(ser <= five_perc)[0]
            index_nintyfive = np.where(ser >= nintyfive_perc)[0]

            num_outliers = index_five.size + index_nintyfive.size
            perc_outliers = num_outliers / len(ser)

        for key in possible_keys:
            if key in dict_:
                #for dealing with enum
                if key == "type":
                    temp_list.append(dict_[key].value)
                else:  
                    temp_list.append(dict_[key])

            elif key == "num_outliers":
                temp_list.append(num_outliers)
            elif key == "perc_outliers":
                temp_list.append(perc_outliers)
            else:
                temp_list.append("None")
        data["variables general info"][variable] = temp_list

        #for frequency
        with_nan = dict_["value_counts_with_nan"]  
        without_nan = dict_["value_counts_without_nan"]

        data["variables frequency info"]["value_counts_with_nan"][variable] = [list(with_nan.index), list(with_nan.values)]
        data["variables frequency info"]["value_counts_without_nan"][variable] = [list(without_nan.index), list(without_nan.values)]

    try:
        correlations = parent_dict["correlations"]

        data["correlations"] = {}

        for corr, df_ in correlations.items():
            df_.fillna("NAN", inplace=True)
            data["correlations"][corr] = [list(df_.index), [list(val) for val in df_.values]]
    except:
        print("No correlation generated")
        
    with open(f"{output_folder}{file_name}.json", "w") as write_file:
        simplejson.dump(data, write_file, default=convert, ignore_nan=True) 	