# A Must-Read Cheatsheet to Switch From Pandas to Lightning-Fast Polars
## N most-used Pandas operations/techniques shown in Polars
![](images/midjourney.png)

### Introduction

### 0. Reading/writing data

In [12]:
import pandas as pd

df = pd.read_csv(
    filepath_or_buffer="data/diamonds.csv",
    header=0,  # Default
    usecols=["price", "cut", "color"],
    delimiter=",",
    nrows=10000,
    dtype={
        "cut": "string"
    },  # Override datatype mappings
    na_values="N/A",  # Consider as NaN
)

In [None]:
df.to_csv("data/new_file.csv")

In [None]:
df = pd.read_parquet(
    path="data.parquet",
    columns=["list", "of", "columns"],
)

In [None]:
df.to_parquet(
    path="new_data.parquet",
    compression="snappy",  # Default
)

### 1. Creating Series and DataFrames

In [9]:
series = pd.Series(
    data=[1, 2, 3, 4],
    name="new_series",
)

df1 = pd.DataFrame(
    {
        "col1": ["a", "b", "c"],
        "col2": [1, 2, 3],
    }
)

df2 = pd.DataFrame(
    [
        {"col1": "a", "col2": 1},
        {"col1": "b", "col2": 2},
        {"col1": "c", "col2": 3},
    ]
)

### 2. Viewing data

In [13]:
df.head()

Unnamed: 0,cut,color,price
0,Ideal,E,326
1,Premium,E,326
2,Good,E,327
3,Premium,I,334
4,Good,J,335


In [14]:
df.tail()

Unnamed: 0,cut,color,price
9995,Very Good,E,4704
9996,Premium,E,4704
9997,Premium,E,4704
9998,Premium,D,4704
9999,Fair,D,4704


In [15]:
df.sample(5)

Unnamed: 0,cut,color,price
5764,Good,F,3897
9178,Ideal,I,4543
9326,Premium,E,4579
8011,Premium,E,4328
7042,Very Good,G,4162


In [16]:
df.describe()

Unnamed: 0,price
count,10000.0
mean,3406.6043
std,1110.12915
min,326.0
25%,3033.0
50%,3626.5
75%,4198.0
max,4704.0


### 3. What are expressions in Polars?

### 4. Selecting data

In [19]:
price = df["price"]

two_cols = df[["price", "cut"]]

In [32]:
df.select_dtypes(
    include="number"
).head()

Unnamed: 0,carat,depth,table,price,x,y,z
0,0.23,61.5,55.0,326,3.95,3.98,2.43
1,0.21,59.8,61.0,326,3.89,3.84,2.31
2,0.23,56.9,65.0,327,4.05,4.07,2.31
3,0.29,62.4,58.0,334,4.2,4.23,2.63
4,0.31,63.3,58.0,335,4.34,4.35,2.75


### 5. Filtering data

In [23]:
df[
    df["price"].between(500, 1000)
].head()

Unnamed: 0,cut,color,price
60,Ideal,I,552
61,Premium,D,552
62,Ideal,D,552
63,Ideal,D,552
64,Premium,I,552


In [24]:
df[df["cut"] == "Ideal"].head()

Unnamed: 0,cut,color,price
0,Ideal,E,326
11,Ideal,J,340
13,Ideal,J,344
16,Ideal,I,348
39,Ideal,I,403


In [25]:
df[
    df["color"].isin(["E", "J"])
    & df["price"]
    < 500
].head()

Unnamed: 0,cut,color,price
0,Ideal,E,326
1,Premium,E,326
2,Good,E,327
3,Premium,I,334
4,Good,J,335


### 6. Creating new columns

In [27]:
df["new_col"] = df["price"] ** 2
df["new_col2"] = (
    str(df["price"]) + df["cut"]
)

### 7. Groupby

In [29]:
df = pd.read_csv("data/diamonds.csv")

df.groupby("cut").count()

Unnamed: 0_level_0,carat,color,clarity,depth,table,price,x,y,z
cut,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
Fair,1610,1610,1610,1610,1610,1610,1610,1610,1610
Good,4906,4906,4906,4906,4906,4906,4906,4906,4906
Ideal,21551,21551,21551,21551,21551,21551,21551,21551,21551
Premium,13791,13791,13791,13791,13791,13791,13791,13791,13791
Very Good,12082,12082,12082,12082,12082,12082,12082,12082,12082


In [31]:
cols = ["price", "table", "depth"]

df.groupby("cut")[cols].mean()

Unnamed: 0_level_0,price,table,depth
cut,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Fair,4358.757764,59.053789,64.041677
Good,3928.864452,58.694639,62.365879
Ideal,3457.54197,55.951668,61.709401
Premium,4584.257704,58.746095,61.264673
Very Good,3981.759891,57.95615,61.818275


### 8. Joining and concatenation

### 9. The lazy API in Polars

### Conclusion