## Package Imports

In [16]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import sys
# Project python scripts directory
sys.path.insert(1, '../src')

import tarfile
import os
import json
import re
import time

from functools import partial
from pdb import set_trace

import numpy as np
import pandas as pd

import matplotlib as mpl
mpl.use('pgf')
import matplotlib.pyplot as plt
import matplotlib.transforms as transforms
import seaborn as sns


from sklearn.base import BaseEstimator,TransformerMixin
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import (OneHotEncoder, StandardScaler)
from sklearn.linear_model import LogisticRegression
from sklearn import set_config


from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_auc_score

from pandas_profiling import ProfileReport

## Global Variables

In [None]:
# Define project paths.
dir_data_raw =        "../input/data/raw"
dir_data_processed =  "../input/data/processed"
dir_config =          "../input/config"
dir_figures =         "../output/figures"
dir_tables =          "../output/tables"
dir_submit =          "../output/submissions"
dir_src =             "../src"

## Configurations

In [None]:
sns.set()

# Set matplotlib style to use LaTeX default fonts and engine.
plt.rcParams.update({
    "font.family": "serif",  # use serif/main font for text elements
    "text.usetex": True,     # use inline math for ticks
    "font.serif": [],
    "font.size": 10,
    "axes.labelsize": 10,
    "xtick.labelsize": 10,
    "ytick.labelsize": 10,
    "pgf.rcfonts": False,     # don't setup fonts from rc parameters
    "figure.constrained_layout.use": True
    })

# Sklean Display pipelines as diagrams
set_config(display='diagram')

## Helper Functions

In [15]:
%%writefile ../src/helper_functions.py

import os
import pandas as pd
from functools import partial

# Text size in report (points)
latex_text_size = 453

def save_latex_table(df, path=None, precision=2, formatter={}, **kwargs):
    print(
        df.style\
        .hide(axis='index')\
        .format(precision=precision, thousands=",", formatter=formatter)\
        .to_latex(
            path,
            position='H', 
            position_float='centering', 
            hrules=True,
            **kwargs
        )
    )



def display_full_frame(df):
    with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
        display(df)


def set_fig_size(width_pt, fraction=1, subplots=(1, 1)):
    """Set figure dimensions to sit nicely in our document.

    Parameters
    ----------
    width_pt: float
            Document width in points
    fraction: float, optional
            Fraction of the width which you wish the figure to occupy
    subplots: array-like, optional
            The number of rows and columns of subplots.
    Returns
    -------
    fig_dim: tuple
            Dimensions of figure in inches
    """
    # Width of figure (in pts)
    fig_width_pt = width_pt * fraction
    # Convert from pt to inches
    inches_per_pt = 1 / 72.27

    # Golden ratio to set aesthetic figure height
    golden_ratio = (5**.5 - 1) / 2

    # Figure width in inches
    fig_width_in = fig_width_pt * inches_per_pt
    # Figure height in inches
    fig_height_in = fig_width_in * golden_ratio * (subplots[0] / subplots[1])

    return (fig_width_in, fig_height_in)

set_report_fig_size = partial(set_fig_size, width_pt=latex_text_size)

Overwriting ../src/helper_functions.py
