[How to Learn the Polars DataFrame Library!](https://www.youtube.com/watch?v=OTVDmA6CRlQ&list=WL&index=1)

[Documentation polars](https://docs.pola.rs/)

---

[Guetting started with Polars (in code)](https://youtu.be/OTVDmA6CRlQ?t=986)

[Getting starded](https://docs.pola.rs/user-guide/getting-started/#reading-writing)

Date 26/08/24

In [1]:
# %pip install polars

In [1]:
import polars as pl

In [2]:
from datetime import datetime

# Création d'une DF
df = pl.DataFrame(
    {
        "integer": [1, 2, 3],
        "date": [
            datetime(2025, 1, 1),
            datetime(2025, 1, 2),
            datetime(2025, 1, 3),
        ],
        "float": [4.0, 5.0, 6.0],
        "string": ["a", "b", "c"],
    }
)

df

integer,date,float,string
i64,datetime[μs],f64,str
1,2025-01-01 00:00:00,4.0,"""a"""
2,2025-01-02 00:00:00,5.0,"""b"""
3,2025-01-03 00:00:00,6.0,"""c"""


In [3]:
# Conversion de la DF ci-avant en fichier .csv
df.write_csv("data/output.csv")

In [4]:
# Conversion du fichier .csv en DF
df_csv = pl.read_csv("data/output.csv")
print(df_csv)

shape: (3, 4)
┌─────────┬────────────────────────────┬───────┬────────┐
│ integer ┆ date                       ┆ float ┆ string │
│ ---     ┆ ---                        ┆ ---   ┆ ---    │
│ i64     ┆ str                        ┆ f64   ┆ str    │
╞═════════╪════════════════════════════╪═══════╪════════╡
│ 1       ┆ 2025-01-01T00:00:00.000000 ┆ 4.0   ┆ a      │
│ 2       ┆ 2025-01-02T00:00:00.000000 ┆ 5.0   ┆ b      │
│ 3       ┆ 2025-01-03T00:00:00.000000 ┆ 6.0   ┆ c      │
└─────────┴────────────────────────────┴───────┴────────┘


In [2]:
# Chargement d'un fichier .csv converti en DF
pokemon = pl.read_csv('data/pokemon_data.csv')

In [10]:
# Affichage des 5 premières lignes
pokemon.head()

#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
i64,str,str,str,i64,i64,i64,i64,i64,i64,i64,bool
1,"""Bulbasaur""","""Grass""","""Poison""",45,49,49,65,65,45,1,False
2,"""Ivysaur""","""Grass""","""Poison""",60,62,63,80,80,60,1,False
3,"""Venusaur""","""Grass""","""Poison""",80,82,83,100,100,80,1,False
3,"""VenusaurMega Venusaur""","""Grass""","""Poison""",80,100,123,122,120,80,1,False
4,"""Charmander""","""Fire""",,39,52,43,60,50,65,1,False


In [12]:
# Sélection de champs : tous les champs
pokemon.select(pl.col('*'))

#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
i64,str,str,str,i64,i64,i64,i64,i64,i64,i64,bool
1,"""Bulbasaur""","""Grass""","""Poison""",45,49,49,65,65,45,1,false
2,"""Ivysaur""","""Grass""","""Poison""",60,62,63,80,80,60,1,false
3,"""Venusaur""","""Grass""","""Poison""",80,82,83,100,100,80,1,false
3,"""VenusaurMega Venusaur""","""Grass""","""Poison""",80,100,123,122,120,80,1,false
4,"""Charmander""","""Fire""",,39,52,43,60,50,65,1,false
…,…,…,…,…,…,…,…,…,…,…,…
719,"""Diancie""","""Rock""","""Fairy""",50,100,150,100,150,50,6,true
719,"""DiancieMega Diancie""","""Rock""","""Fairy""",50,160,110,160,110,110,6,true
720,"""HoopaHoopa Confined""","""Psychic""","""Ghost""",80,110,60,150,130,70,6,true
720,"""HoopaHoopa Unbound""","""Psychic""","""Dark""",80,160,60,170,130,80,6,true


In [14]:
# Sélection de champs : champs ciblés
pokemon.select(pl.col("Name", 'Type 1'))

Name,Type 1
str,str
"""Bulbasaur""","""Grass"""
"""Ivysaur""","""Grass"""
"""Venusaur""","""Grass"""
"""VenusaurMega Venusaur""","""Grass"""
"""Charmander""","""Fire"""
…,…
"""Diancie""","""Rock"""
"""DiancieMega Diancie""","""Rock"""
"""HoopaHoopa Confined""","""Psychic"""
"""HoopaHoopa Unbound""","""Psychic"""


In [17]:
# Sélection de lignes : lignes ciblées 
# (pokemons légendaires et de type 'Grass')
pokemon.filter(
    (pl.col('Legendary')) & (pl.col("Type 1") == "Grass"))

#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
i64,str,str,str,i64,i64,i64,i64,i64,i64,i64,bool
492,"""ShayminLand Forme""","""Grass""",,100,100,100,100,100,100,4,True
492,"""ShayminSky Forme""","""Grass""","""Flying""",100,103,75,120,75,127,4,True
640,"""Virizion""","""Grass""","""Fighting""",91,90,72,90,129,108,5,True


In [26]:
# Affichage de 5 lignes au hasard
pokemon.sample(5)

#,Name,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
i64,str,str,str,i64,i64,i64,i64,i64,i64,i64,bool
606,"""Beheeyem""","""Psychic""",,75,75,75,125,95,40,5,False
583,"""Vanillish""","""Ice""",,51,65,65,80,75,59,5,False
563,"""Cofagrigus""","""Ghost""",,58,50,145,95,105,30,5,False
156,"""Quilava""","""Fire""",,58,64,58,80,65,80,2,False
277,"""Swellow""","""Normal""","""Flying""",60,85,60,50,50,125,3,False


In [3]:
# Liste des colonnes
pokemon.columns

['#',
 'Name',
 'Type 1',
 'Type 2',
 'HP',
 'Attack',
 'Defense',
 'Sp. Atk',
 'Sp. Def',
 'Speed',
 'Generation',
 'Legendary']

In [14]:
# Sélection de champs avec un trie, une moyenne, et une valeur maximum 
# pour certains de ces champs : affichage de 5 lignes au hasard
pokemon.select(
    pl.col("Name").sort(),
    pl.col("HP").mean().alias("Average HP"),
    pl.col("Attack").max().alias("Max Attack"),
    pl.col("Speed")
).sample(5)

Name,Average HP,Max Attack,Speed
str,f64,i64,i64
"""LopunnyMega Lopunny""",69.25875,190,75
"""Torchic""",69.25875,190,95
"""Drifblim""",69.25875,190,78
"""Skiddo""",69.25875,190,105
"""Rhyhorn""",69.25875,190,66


https://youtu.be/OTVDmA6CRlQ?t=2906