In [1]:
from __future__ import annotations

import os

import pandas as pd

In [2]:
import stdflow as sf
from stdflow import Step
from root import data_root

## Formatting

In [3]:
def convert_string_numbers(df: pd.DataFrame):
    def to_numeric(value):
        if isinstance(value, str):
            try:
                # Convert to integer if possible
                return int(value)
            except ValueError:
                try:
                    # Convert to float if possible
                    return float(value)
                except ValueError:
                    # Return the original string if not convertible
                    return value
        else:
            return value

    return df.applymap(to_numeric)


In [4]:

def fix_typing(attrs: str | list):
    # Load
    step = Step(root=data_root, attrs=attrs)
    step.version = step.var("version", ":default")
    df = step.load(step='formatted', verbose=True)
    
    # Format
    convert_string_numbers(df)

    # Save 
    step.save(df, step="typing_fixed", verbose=True, index=False)


In [5]:
for attrs in ['countries', 'world_happiness']:
    fix_typing(attrs)

INFO:stdflow.step:Loading data from ../../data/countries/step_formatted/v_ppl/countries.csv


INFO:stdflow.step:Data loaded from ../../data/countries/step_formatted/v_ppl/countries.csv


INFO:stdflow.step:Saving data to ../../data/countries/step_typing_fixed/v_202309220919/countries.csv


INFO:stdflow.step:Data saved to ../../data/countries/step_typing_fixed/v_202309220919/countries.csv


INFO:stdflow.step:Saving metadata to ../../data/countries/step_typing_fixed/v_202309220919/


INFO:stdflow.step:Loading data from ../../data/world_happiness/step_formatted/v_ppl/report_2019.csv


INFO:stdflow.step:Data loaded from ../../data/world_happiness/step_formatted/v_ppl/report_2019.csv


INFO:stdflow.step:Saving data to ../../data/world_happiness/step_typing_fixed/v_202309220919/report_2019.csv


INFO:stdflow.step:Data saved to ../../data/world_happiness/step_typing_fixed/v_202309220919/report_2019.csv


INFO:stdflow.step:Saving metadata to ../../data/world_happiness/step_typing_fixed/v_202309220919/


### Demo Tool

In [6]:
from stdflow import from_pipeline, StepRunner

In [7]:
if not from_pipeline():
    StepRunner("./02_fix_typing.ipynb").run(verbose=True)
    