## Librerias

In [42]:
from datetime import date, timedelta
import polars as pl
import os

## Funciones - Estandares

In [38]:
pl.Config(set_fmt_float="full")

<polars.config.Config at 0x1f5c4faefd0>

In [None]:
path_data = os.path.join(os.getcwd(), 'data')
path_file = lambda file: os.path.join(path_data, file)

## Data

In [None]:
sales_data = pl.read_csv(path_file('sales.csv'), 
                        schema=pl.Schema({
                            'Store': pl.Int64(),
                            'Date': pl.Date(),
                            'Weekly_Sales': pl.Float64(),
                            'Holiday_Flag': pl.Int64(),
                            'Temperature': pl.Float64(),
                            'Fuel_Price': pl.Float64(),
                            'CPI': pl.Float64(),
                            'Unemployment': pl.Float64(),
                        }
                        ))
sales_data.schema

In [47]:
store = pl.read_csv(path_file('storedesc.csv'))

## Transformacion

### Fecha

#### Filtro

In [None]:
sales_data.filter(pl.col('Store') == 1).head(2)

In [None]:
sales_data.filter(pl.col('Date') == date(2010,2,5)).head(2)

#### Operaciones

In [None]:
sales_data = sales_data.with_columns(
    pl.col('Date').dt.strftime('%Y-%m').alias('Y-M')
)

In [41]:
sales_data.group_by('Y-M').agg(
    pl.sum('Weekly_Sales').round(0).alias('Total_Sales')
).head()

Y-M,Total_Sales
str,f64
"""2012-09""",180645544
"""2011-05""",181648158
"""2010-03""",181919802
"""2010-11""",202853370
"""2010-05""",186710934


In [43]:
sales_data = sales_data.with_columns(
    (pl.col('Date') + timedelta(days=7)).alias('Next_Week_Date')
)

In [45]:
sales_data.head(2)

Store,Date,Weekly_Sales,Holiday_Flag,Temperature,Fuel_Price,CPI,Unemployment,Y-M,Next_Week_Date
i64,date,f64,i64,f64,f64,f64,f64,str,date
1,2010-02-05,1643690.9,0,42.31,2.572,211.0963582,8.106,"""2010-02""",2010-02-12
1,2010-02-12,1641957.44,1,38.51,2.548,211.2421698,8.106,"""2010-02""",2010-02-19


### String

In [56]:
store.select(
    pl.col('address').str.split(' ').get(0).alias('Nunmber')
)

Nunmber
list[str]
"[""09"", ""Florence"", ""Way""]"


In [62]:
store.with_columns(
    pl.col('address').str.splitn(' ',2).alias('Nunmber')
    .struct.rename_fields(['street_number', 'street_name'])
    .alias('address')
).unnest('address').head(2)

store,city,country,street_number,street_name
i64,str,str,str,str
1,"""Honolulu""","""United States""","""09""","""Florence Way"""
2,"""Tulsa""","""United States""","""97""","""Hollow Ridge Hill"""


In [65]:
store.select(
    pl.col('address').str.replace(r'\d+', '').alias('Street_Name_Only')
).head(2)

Street_Name_Only
str
""" Florence Way"""
""" Hollow Ridge Hill"""
