In [204]:
import re
import polars as pl

df = pl.read_csv('data/oecd_prices.csv')

In [205]:
df1 = (
    df.filter(
        (pl.col('REF_AREA') == 'USA') & (pl.col('UNIT_MEASURE') == 'PA') &(pl.col('FREQ') == 'M')
     )['TIME_PERIOD', 'OBS_VALUE']
)
df1a = (
    df1.with_columns(pl.col('TIME_PERIOD').str.to_date('%Y-%m').alias('TIME_PERIOD'))
       .filter(pl.col('TIME_PERIOD').dt.year() >= 2020)
)
with pl.Config(tbl_rows=100):
    print(df1a.with_columns(pl.col('OBS_VALUE').pct_change().alias('pct_change')))

shape: (55, 3)
┌─────────────┬───────────┬────────────┐
│ TIME_PERIOD ┆ OBS_VALUE ┆ pct_change │
│ ---         ┆ ---       ┆ ---        │
│ date        ┆ f64       ┆ f64        │
╞═════════════╪═══════════╪════════════╡
│ 2020-01-01  ┆ 2.486572  ┆ null       │
│ 2020-02-01  ┆ 2.334874  ┆ -0.061007  │
│ 2020-03-01  ┆ 1.539327  ┆ -0.340724  │
│ 2020-04-01  ┆ 0.3290967 ┆ -0.786207  │
│ 2020-05-01  ┆ 0.1179264 ┆ -0.641666  │
│ 2020-06-01  ┆ 0.6457331 ┆ 4.47573    │
│ 2020-07-01  ┆ 0.9860818 ┆ 0.527073   │
│ 2020-08-01  ┆ 1.309645  ┆ 0.32813    │
│ 2020-09-01  ┆ 1.371325  ┆ 0.047097   │
│ 2020-10-01  ┆ 1.182066  ┆ -0.138012  │
│ 2020-11-01  ┆ 1.174536  ┆ -0.00637   │
│ 2020-12-01  ┆ 1.362005  ┆ 0.159611   │
│ 2021-01-01  ┆ 1.39977   ┆ 0.027728   │
│ 2021-02-01  ┆ 1.676215  ┆ 0.197493   │
│ 2021-03-01  ┆ 2.619762  ┆ 0.562903   │
│ 2021-04-01  ┆ 4.159695  ┆ 0.587814   │
│ 2021-05-01  ┆ 4.992707  ┆ 0.200258   │
│ 2021-06-01  ┆ 5.391451  ┆ 0.079865   │
│ 2021-07-01  ┆ 5.365475  ┆ -0.004818  │
│

In [206]:
df2 = (
    df.filter(
        (~pl.col('REF_AREA').is_in(['EA_20', 'G20', 'EU27_2020'])) 
        & (pl.col('UNIT_MEASURE') == 'PA') & (pl.col('FREQ') == 'A')
     )['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE'].sort(by=pl.col('OBS_VALUE'), descending=True)
)['REF_AREA', 'TIME_PERIOD', 'OBS_VALUE']
df2a = df2.filter(pl.col('TIME_PERIOD') == '2020')
print(df2a.head())
print(df2a.tail())

shape: (5, 3)
┌──────────┬─────────────┬───────────┐
│ REF_AREA ┆ TIME_PERIOD ┆ OBS_VALUE │
│ ---      ┆ ---         ┆ ---       │
│ str      ┆ str         ┆ f64       │
╞══════════╪═════════════╪═══════════╡
│ ARG      ┆ 2020        ┆ 42.01509  │
│ TUR      ┆ 2020        ┆ 12.3      │
│ IND      ┆ 2020        ┆ 5.561349  │
│ SAU      ┆ 2020        ┆ 3.445458  │
│ MEX      ┆ 2020        ┆ 3.396834  │
└──────────┴─────────────┴───────────┘
shape: (5, 3)
┌──────────┬─────────────┬───────────┐
│ REF_AREA ┆ TIME_PERIOD ┆ OBS_VALUE │
│ ---      ┆ ---         ┆ ---       │
│ str      ┆ str         ┆ f64       │
╞══════════╪═════════════╪═══════════╡
│ FRA      ┆ 2020        ┆ 0.5       │
│ DEU      ┆ 2020        ┆ 0.4       │
│ EA20     ┆ 2020        ┆ 0.3       │
│ JPN      ┆ 2020        ┆ -0.024996 │
│ ITA      ┆ 2020        ┆ -0.1      │
└──────────┴─────────────┴───────────┘


In [207]:
df3 = (
    df2.sort(by='TIME_PERIOD').filter(pl.col('TIME_PERIOD') >= '2010')
       .pivot(on='TIME_PERIOD', index='REF_AREA', values='OBS_VALUE')
)
df3

REF_AREA,2010,2011,2012,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
str,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
"""IND""",11.98939,8.858361,9.312446,10.90764,6.353195,5.872427,4.941027,2.490887,4.8607,7.659695,5.561349,4.890086,5.90043,5.552331
"""TUR""",8.6,6.5,9.0,7.5,8.9,7.7,7.7,11.1,16.3,15.2,12.3,19.6,72.3,54.0
"""RUS""",6.849392,8.440471,5.074733,6.753717,7.823416,15.5344,7.042436,3.683334,2.878305,4.470367,3.38166,6.694456,,
"""SAU""",5.339416,5.826217,2.86627,3.532522,2.23629,1.206073,2.06884,-0.838195,2.458142,-2.093333,3.445458,3.06329,2.474074,2.327085
"""IDN""",5.134203,5.356048,4.279512,6.4125,6.394926,6.363121,3.525805,3.808798,3.198346,3.030587,1.919834,1.56008,4.209456,3.669401
"""BRA""",5.038727,6.63645,5.403499,6.204311,6.32904,9.029901,8.739143,3.446373,3.66485,3.732976,3.211768,8.30166,9.280107,4.593563
"""MEX""",4.156728,3.40738,4.111508,3.80639,4.018617,2.720641,2.821708,6.041456,4.899351,3.635961,3.396834,5.689209,7.896276,5.527961
"""ZAF""",4.08973,4.999267,5.724658,5.784469,6.129838,4.540642,6.571396,5.184247,4.517165,4.120246,3.210036,4.611672,7.039727,6.073908
"""GBR""",3.3,4.5,2.8,2.6,1.5,0.0,0.7,2.7,2.5,1.8,0.8812616,2.582376,9.053559,7.30187
"""CHN""",3.3,5.4,2.6,2.6,2.0,1.4,2.0,1.6,2.1,2.9,2.5,0.9,2.0,0.2


In [208]:
df3.columns

['REF_AREA',
 '2010',
 '2011',
 '2012',
 '2013',
 '2014',
 '2015',
 '2016',
 '2017',
 '2018',
 '2019',
 '2020',
 '2021',
 '2022',
 '2023']

In [209]:
df3['REF_AREA']

REF_AREA
str
"""IND"""
"""TUR"""
"""RUS"""
"""SAU"""
"""IDN"""
"""BRA"""
"""MEX"""
"""ZAF"""
"""GBR"""
"""CHN"""


In [215]:
yrs = ['2020', '2021', '2022', '2023']
cols = ['REF_AREA'] + yrs + ['row_mean']
df3 = (
    df3.with_columns(pl.mean_horizontal(*yrs)
       .alias('row_mean'))[cols].sort('row_mean', descending=True)
)
df3

REF_AREA,2020,2021,2022,2023,row_mean
str,f64,f64,f64,f64,f64
"""ARG""",42.01509,48.40938,72.43076,133.4889,74.086033
"""TUR""",12.3,19.6,72.3,54.0,39.55
"""BRA""",3.211768,8.30166,9.280107,4.593563,6.3467745
"""MEX""",3.396834,5.689209,7.896276,5.527961,5.62757
"""IND""",5.561349,4.890086,5.90043,5.552331,5.476049
"""ZAF""",3.210036,4.611672,7.039727,6.073908,5.233836
"""RUS""",3.38166,6.694456,,,5.038058
"""GBR""",0.8812616,2.582376,9.053559,7.30187,4.954767
"""DEU""",0.4,3.2,8.7,6.0,4.575
"""USA""",1.233584,4.697859,8.0028,4.116338,4.512645
