In [1]:
# !pip install narwhals pandas polars pyarrow


In [2]:
import pandas as pd
import numpy as np
import narwhals as nw
import polars as pl

In [3]:
# Create synthetic dataset using pandas (as a native DataFrame)
num_rows = 200_000_000  # 200M rows for demonstration
rng = np.random.default_rng(seed=42)
pdf_sales = pd.DataFrame({
    "id": rng.integers(0, 20_000, size=num_rows),        # many repeat IDs to allow groupby
    "value": rng.normal(loc=100.0, scale=50.0, size=num_rows).round(2),  # some numeric value
})
# Create a region mapping: each id gets a region label (e.g., "North","South","East","West")
unique_ids = pdf_sales["id"].unique()
regions = ["North", "South", "East", "West"]
id_to_region = {id_val: rng.choice(regions) for id_val in unique_ids}
pdf_regions = pd.DataFrame({
    "id": list(id_to_region.keys()),
    "region": [id_to_region[i] for i in id_to_region.keys()]
})
# Quick peek at data shape
print(pdf_sales.shape, pdf_regions.shape)
print(pdf_sales.head(3), "\n", pdf_regions.head(3))
print(pdf_sales.info())

(200000000, 2) (20000, 2)
      id   value
0   1785   54.31
1  15479  123.47
2  13091  -15.55 
       id region
0   1785  North
1  15479   West
2  13091  North
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200000000 entries, 0 to 199999999
Data columns (total 2 columns):
 #   Column  Dtype  
---  ------  -----  
 0   id      int64  
 1   value   float64
dtypes: float64(1), int64(1)
memory usage: 3.0 GB
None


In [4]:
print(pdf_regions.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 20000 entries, 0 to 19999
Data columns (total 2 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   id      20000 non-null  int64 
 1   region  20000 non-null  object
dtypes: int64(1), object(1)
memory usage: 312.6+ KB
None


In [5]:
# Convert to Polars DataFrames for demonstration
pl_sales = pl.DataFrame(pdf_sales)
pl_regions = pl.DataFrame(pdf_regions)

In [6]:
# Wrap the pandas dataframes with Narwhals
df_sales_nw = nw.from_native(pdf_sales)
df_regions_nw = nw.from_native(pdf_regions)

In [7]:
  # For a comprehensive overview similar to pandas .info()
def polars_info(df):
    print(f"Shape: {df.shape}")
    print(f"Memory usage: {df.estimated_size() / (1024**2):.2f} MB")
    print("\nColumn info:")
    for name, dtype in df.schema.items():
        print(f"  {name}: {dtype}")

In [8]:
polars_info(df_sales_nw)
polars_info(df_regions_nw)

Shape: (200000000, 2)
Memory usage: 3051.76 MB

Column info:
  id: Int64
  value: Float64
Shape: (20000, 2)
Memory usage: 1.63 MB

Column info:
  id: Int64
  region: String


In [9]:
import time
start_time = time.time()
# 1. Join sales with region labels on 'id'
df_joined = df_sales_nw.join(df_regions_nw, on="id", how="inner")
print("After join, columns:", df_joined.columns)
print("Sample row (native):", nw.to_native(df_joined).iloc[0].to_dict())
# 2. Add a derived column: value normalized by overall mean
overall_mean = pdf_sales["value"].mean()  # compute using pandas for reference
df_joined = df_joined.with_columns(
    (nw.col("value") / overall_mean).alias("value_norm")
)
# 3. Filter rows: keep only transactions with positive value
df_joined = df_joined.filter(nw.col("value") > 0)
print("Post-filter shape (native):", nw.to_native(df_joined).shape)
# 4. Group by region and aggregate total and average value
df_summary = df_joined.group_by("region").agg(
    nw.col("value").sum().alias("total_value"),
    nw.col("value").mean().alias("avg_value"),
    nw.len().alias("transaction_count")
)
# Convert result to native pandas for display
summary_native = nw.to_native(df_summary)
print("Summary by region (pandas):\n", summary_native)
end_time = time.time()
print(f"Execution time: {end_time - start_time:.4f} seconds")

After join, columns: ['id', 'value', 'region']
Sample row (native): {'id': 1785, 'value': 54.31, 'region': 'North'}
Post-filter shape (native): (195448682, 4)
Summary by region (pandas):
   region   total_value   avg_value  transaction_count
0  North  5.038577e+09  102.766448           49029396
1   West  5.101366e+09  102.751330           49647685
2   East  4.878919e+09  102.760417           47478583
3  South  5.065127e+09  102.755460           49293018
Execution time: 45.1371 seconds


In [10]:
print(type(df_sales_nw))

<class 'narwhals.dataframe.DataFrame'>


In [11]:
start_time = time.time()
# Verification using direct pandas (should match summary_native)
check = pdf_sales.merge(pdf_regions, on="id").query("value > 0").groupby("region")["value"].agg(['sum','mean','count'])
print(check.reset_index())
end_time = time.time()
print(f"Execution time: {end_time - start_time:.4f} seconds")


  region           sum        mean     count
0   East  4.878919e+09  102.760417  47478583
1  North  5.038577e+09  102.766448  49029396
2  South  5.065127e+09  102.755460  49293018
3   West  5.101366e+09  102.751330  49647685
Execution time: 41.7422 seconds


In [12]:
print(type(check))

<class 'pandas.core.frame.DataFrame'>


In [13]:
start_time = time.time()
# Using Polars directly (no Narwhals) for comparison:
pl_summary = (pl_sales.join(pl_regions, on="id", how="inner")
                        .filter(pl.col("value") > 0)
                        .with_columns((pl.col("value") / pl.col("value").mean()).alias("value_norm"))
                        .group_by("region")
                        .agg([
                            pl.col("value").sum().alias("total_value"),
                            pl.col("value").mean().alias("avg_value"),
                            pl.col("value").len().alias("transaction_count")
                        ])
             )
print("Summary by region (Polars):\n", pl_summary)
end_time = time.time()
print(f"Execution time: {end_time - start_time:.4f} seconds")

Summary by region (Polars):
 shape: (4, 4)
┌────────┬─────────────┬────────────┬───────────────────┐
│ region ┆ total_value ┆ avg_value  ┆ transaction_count │
│ ---    ┆ ---         ┆ ---        ┆ ---               │
│ str    ┆ f64         ┆ f64        ┆ u32               │
╞════════╪═════════════╪════════════╪═══════════════════╡
│ North  ┆ 5.0386e9    ┆ 102.766448 ┆ 49029396          │
│ West   ┆ 5.1014e9    ┆ 102.75133  ┆ 49647685          │
│ South  ┆ 5.0651e9    ┆ 102.75546  ┆ 49293018          │
│ East   ┆ 4.8789e9    ┆ 102.760417 ┆ 47478583          │
└────────┴─────────────┴────────────┴───────────────────┘
Execution time: 15.3295 seconds


In [14]:
print(type(pl_summary))

<class 'polars.dataframe.frame.DataFrame'>


In [15]:
start_time = time.time()
# Wrap Polars dataframes with Narwhals and reuse the same transformation pipeline
df_sales_nw_pl = nw.from_native(pl_sales)
df_regions_nw_pl = nw.from_native(pl_regions)
df_summary_pl = (df_sales_nw_pl.join(df_regions_nw_pl, on="id", how="inner")
                                .filter(nw.col("value") > 0)
                                .with_columns((nw.col("value") / pl_sales["value"].mean()).alias("value_norm")) 
                                .group_by("region")
                                .agg(
                                    nw.col("value").sum().alias("total_value"),
                                    nw.col("value").mean().alias("avg_value"),
                                    nw.len().alias("transaction_count")
                                ))
summary_pl_native = nw.to_native(df_summary_pl)  # this will be a Polars DataFrame
print("Summary by region (via Narwhals on Polars):\n", summary_pl_native)
end_time = time.time()
print(f"Execution time: {end_time - start_time:.4f} seconds")

Summary by region (via Narwhals on Polars):
 shape: (4, 4)
┌────────┬─────────────┬────────────┬───────────────────┐
│ region ┆ total_value ┆ avg_value  ┆ transaction_count │
│ ---    ┆ ---         ┆ ---        ┆ ---               │
│ str    ┆ f64         ┆ f64        ┆ u32               │
╞════════╪═════════════╪════════════╪═══════════════════╡
│ South  ┆ 5.0651e9    ┆ 102.75546  ┆ 49293018          │
│ North  ┆ 5.0386e9    ┆ 102.766448 ┆ 49029396          │
│ East   ┆ 4.8789e9    ┆ 102.760417 ┆ 47478583          │
│ West   ┆ 5.1014e9    ┆ 102.75133  ┆ 49647685          │
└────────┴─────────────┴────────────┴───────────────────┘
Execution time: 20.2728 seconds


In [16]:
print(type(df_summary_pl))

<class 'narwhals.dataframe.DataFrame'>


In [17]:
# CSV Loading Benchmark (Notebook/VS Code friendly)
# pandas vs polars vs narwhals(pandas) vs narwhals(polars)

from __future__ import annotations
from pathlib import Path
import time, statistics as stats, json
from typing import Callable, List, Dict

import pandas as pd
import polars as pl
import narwhals as nw

# ---------- timing helpers ----------
def _time_once(fn: Callable[[], object]) -> float:
    t0 = time.perf_counter(); _ = fn(); return time.perf_counter() - t0

def time_fn(fn: Callable[[], object], repeats: int = 5, warmup: int = 1) -> float:
    for _ in range(max(0, warmup)): _ = _time_once(fn)      # warm-up (I/O cache, imports)
    return stats.median([_time_once(fn) for _ in range(max(1, repeats))])

# ---------- loaders (mirror your four cases) ----------
def pandas_loader(p: str):             return pd.read_csv(p)
def polars_loader(p: str):             return pl.read_csv(p)
def narwhals_pandas_loader(p: str):    return nw.from_native(pd.read_csv(p))
def narwhals_polars_loader(p: str):    return nw.from_native(pl.read_csv(p))

LOADERS: List[tuple[str, Callable[[str], object]]] = [
    ("Pandas Loading Time", pandas_loader),
    ("Polars Loading Time", polars_loader),
    ("Narwhals with Pandas Loading Time", narwhals_pandas_loader),
    ("Narwhals with Polars Loading Time", narwhals_polars_loader),
]

# ---------- benchmarking core ----------
def benchmark_csv_loading(
    files: list[str | Path] | None = None,
    *,
    repeats: int = 5,
    warmup: int = 1,
    save_csv: str | None = "loading_benchmark.csv",
    save_json: str | None = "loading_benchmark.json",
) -> pd.DataFrame:
    """
    Returns a DataFrame with columns:
    File | Pandas Loading Time | Polars Loading Time | Narwhals with Pandas Loading Time | Narwhals with Polars Loading Time

    - If `files` is None, looks for defaults next to the notebook:
      cards_data.csv, transactions_data.csv, users_data.csv.
      If none found, falls back to all *.csv in the current directory.
    - Filters strictly to .csv files (prevents kernel JSON from being picked).
    """
    cwd = Path.cwd()

    if files:
        candidates = [Path(f).expanduser().resolve() for f in files]
    else:
        # Defaults first
        defaults = [cwd / "cards_data.csv", cwd / "transactions_data.csv", cwd / "users_data.csv"]
        candidates = [p for p in defaults if p.exists()]
        # Fallback to all CSVs in CWD if defaults missing
        if not candidates:
            candidates = sorted(p for p in cwd.glob("*.csv") if p.is_file())

    # Keep only existing CSVs
    files_resolved = [p for p in candidates if p.suffix.lower() == ".csv" and p.exists()]
    if not files_resolved:
        raise FileNotFoundError(
            "No CSV files found. Pass explicit paths, or place CSVs in the notebook folder."
        )

    rows: List[Dict[str, object]] = []
    for path in files_resolved:
        row: Dict[str, object] = {"File": path.name}
        for col_name, loader in LOADERS:
            try:
                elapsed = time_fn(lambda p=str(path): loader(p), repeats=repeats, warmup=warmup)
                row[col_name] = round(elapsed, 6)
            except Exception as e:
                row[col_name] = None
                print(f"[ERROR] {col_name} failed for {path}: {e}")
        rows.append(row)

    df = pd.DataFrame(rows, columns=[
        "File",
        "Pandas Loading Time",
        "Polars Loading Time",
        "Narwhals with Pandas Loading Time",
        "Narwhals with Polars Loading Time",
    ])

    # Persist (optional)
    if save_csv:
        df.to_csv(save_csv, index=False)
    if save_json:
        with open(save_json, "w") as f:
            json.dump(rows, f, indent=2)

    return df


In [20]:
import os
CWD = os.getcwd()
card_file = os.path.join(CWD, "cards_data.csv")
transactions_file = os.path.join(CWD, "transactions_data.csv")
users_file = os.path.join(CWD, "users_data.csv")

In [21]:
benchmark_csv_loading(
        files=[
                card_file,
                transactions_file,
                users_file,
    ],
        repeats=5, warmup=1)

Unnamed: 0,File,Pandas Loading Time,Polars Loading Time,Narwhals with Pandas Loading Time,Narwhals with Polars Loading Time
0,cards_data.csv,0.012569,0.002986,0.013106,0.002632
1,transactions_data.csv,12.692821,0.754228,12.086319,0.855322
2,users_data.csv,0.004642,0.002479,0.004708,0.001281


In [None]:
"""
Pandas vs Narwhals: Comprehensive Examples and Verification
Testing each idiom from the comparison table
"""

import pandas as pd
import polars as pl
import narwhals as nw
import numpy as np
from datetime import datetime

# Create sample data for testing
def create_sample_data():
    """Create sample DataFrames in both pandas and polars for testing"""
    data = {
        'a': [1, 2, 3, 4, 5],
        'b': [10, 20, 30, 40, 50],
        'x': [1, -2, 3, -4, 5],
        'y': [2, 4, 6, 8, 10],
        's': ['foo', 'bar', 'foobar', 'baz', 'foo'],
        'g': ['A', 'B', 'A', 'B', 'A'],
        'v': [100, 200, None, 400, 500],
        'd': ['2024-01-01', '2024-01-02', '2024-01-03', '2024-01-04', '2024-01-05'],
        'lst': [[1, 2], [3], [4, 5, 6], [], [7, 8]]
    }
    
    pdf = pd.DataFrame(data)
    pdf['v'] = pdf['v'].astype('float64')  # Handle None as NaN
    plf = pl.DataFrame(data)
    
    return pdf, plf

# 1. WRAP/UNWRAP AT I/O EDGE
def test_wrap_unwrap():
    print("1. WRAP/UNWRAP AT I/O EDGE")
    pdf, plf = create_sample_data()
    
    # Wrap pandas
    df_pandas = nw.from_native(pdf)
    print(f"Wrapped pandas type: {type(df_pandas)}")
    
    # Wrap polars
    df_polars = nw.from_native(plf)
    print(f"Wrapped polars type: {type(df_polars)}")
    
    # Unwrap back to native
    pdf_back = nw.to_native(df_pandas)
    plf_back = nw.to_native(df_polars)
    print(f"Unwrapped pandas type: {type(pdf_back)}")
    print(f"Unwrapped polars type: {type(plf_back)}")
    print()

# 2. SELECT COLUMNS
def test_select_columns():
    print("2. SELECT COLUMNS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_selected = pdf[['a', 'b']]
    print("Pandas:", pdf_selected.columns.tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_selected = df.select('a', 'b')
    result = nw.to_native(df_selected)
    print("Narwhals:", result.columns.tolist())
    
    # Alternative with nw.col
    df_selected2 = df.select([nw.col('a'), nw.col('b')])
    result2 = nw.to_native(df_selected2)
    print("Narwhals (col):", result2.columns.tolist())
    print()

# 3. ADD/REPLACE COLUMN
def test_add_column():
    print("3. ADD/REPLACE COLUMN")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_new = pdf.assign(z=pdf.x + pdf.y)
    print("Pandas new column 'z':", pdf_new['z'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_new = df.with_columns((nw.col('x') + nw.col('y')).alias('z'))
    result = nw.to_native(df_new)
    print("Narwhals new column 'z':", result['z'].tolist())
    print()

# 4. RENAME COLUMNS
def test_rename_columns():
    print("4. RENAME COLUMNS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_renamed = pdf.rename(columns={'a': 'alpha', 'b': 'beta'})
    print("Pandas renamed:", pdf_renamed.columns.tolist()[:2])
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_renamed = df.rename({'a': 'alpha', 'b': 'beta'})
    result = nw.to_native(df_renamed)
    print("Narwhals renamed:", result.columns.tolist()[:2])
    print()

# 5. DROP COLUMNS
def test_drop_columns():
    print("5. DROP COLUMNS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_dropped = pdf.drop(columns=['s', 'g'])
    print("Pandas remaining:", pdf_dropped.columns.tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_dropped = df.drop(['s', 'g'])
    result = nw.to_native(df_dropped)
    print("Narwhals remaining:", result.columns.tolist())
    print()

# 6. FILTER ROWS
def test_filter_rows():
    print("6. FILTER ROWS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_filtered = pdf[pdf.x > 0]
    print("Pandas filtered x values:", pdf_filtered['x'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_filtered = df.filter(nw.col('x') > 0)
    result = nw.to_native(df_filtered)
    print("Narwhals filtered x values:", result['x'].tolist())
    print()

# 7. SORT
def test_sort():
    print("7. SORT")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_sorted = pdf.sort_values(['g', 'a'], ascending=[True, False])
    print("Pandas sorted (g,a):", list(zip(pdf_sorted['g'], pdf_sorted['a'])))
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_sorted = df.sort(['g', 'a'], descending=[False, True])
    result = nw.to_native(df_sorted)
    print("Narwhals sorted (g,a):", list(zip(result['g'], result['a'])))
    print()

# 8. HEAD/TAIL
def test_head_tail():
    print("8. HEAD/TAIL")
    pdf, plf = create_sample_data()
    
    # Pandas way
    print("Pandas head(2) 'a':", pdf.head(2)['a'].tolist())
    print("Pandas tail(2) 'a':", pdf.tail(2)['a'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    head_result = nw.to_native(df.head(2))
    tail_result = nw.to_native(df.tail(2))
    print("Narwhals head(2) 'a':", head_result['a'].tolist())
    print("Narwhals tail(2) 'a':", tail_result['a'].tolist())
    print()

# 9. SAMPLE ROWS
def test_sample():
    print("9. SAMPLE ROWS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_sample = pdf.sample(n=3, random_state=42)
    print("Pandas sample size:", len(pdf_sample))
    
    # Narwhals way (note: seed instead of random_state)
    df = nw.from_native(pdf)
    df_sample = df.sample(n=3, seed=42)
    result = nw.to_native(df_sample)
    print("Narwhals sample size:", len(result))
    print()

# 10. DISTINCT/DROP DUPLICATES
def test_distinct():
    print("10. DISTINCT/DROP DUPLICATES")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_unique = pdf.drop_duplicates(subset=['g'])
    print("Pandas unique 'g' values:", pdf_unique['g'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_unique = df.unique(subset=['g'])
    result = nw.to_native(df_unique)
    print("Narwhals unique 'g' values:", result['g'].tolist())
    print()

# 11. UNIQUE VALUES (Series)
def test_unique_values():
    print("11. UNIQUE VALUES (Series)")
    pdf, plf = create_sample_data()
    
    # Pandas way
    unique_s = pdf['s'].unique()
    print("Pandas unique 's':", sorted(unique_s))
    
    # Narwhals way (returns 1-col frame)
    df = nw.from_native(pdf)
    df_unique = df.select(nw.col('s').unique().alias('s'))
    result = nw.to_native(df_unique)
    print("Narwhals unique 's':", sorted(result['s'].tolist()))
    print()

# 12. COALESCE/FIRST NON-NULL
def test_coalesce():
    print("12. COALESCE/FIRST NON-NULL")
    pdf = pd.DataFrame({
        'a': [None, 2, None, 4],
        'b': [10, None, 30, None],
        'c': [100, 200, 300, 400]
    })
    
    # Pandas way
    pdf['z'] = pdf['a'].fillna(pdf['b']).fillna(pdf['c'])
    print("Pandas coalesce:", pdf['z'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_new = df.with_columns(
        nw.coalesce([nw.col('a'), nw.col('b'), nw.col('c')]).alias('z')
    )
    result = nw.to_native(df_new)
    print("Narwhals coalesce:", result['z'].tolist())
    print()

# 13. FILL MISSING
def test_fill_missing():
    print("13. FILL MISSING")
    pdf = pd.DataFrame({'a': [1, None, 3], 'b': [None, 20, 30]})
    
    # Pandas way
    pdf_filled = pdf.fillna({'a': 0, 'b': -1})
    print("Pandas filled:", pdf_filled.values.tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_filled = df.with_columns(
        nw.col('a').fill_null(0).alias('a'),
        nw.col('b').fill_null(-1).alias('b')
    )
    result = nw.to_native(df_filled)
    print("Narwhals filled:", result.values.tolist())
    print()

# 14. DROP MISSING
def test_drop_missing():
    print("14. DROP MISSING")
    pdf = pd.DataFrame({'a': [1, None, 3, 4], 'b': [10, 20, None, 40]})
    
    # Pandas way
    pdf_dropped = pdf.dropna(subset=['a'])
    print("Pandas after dropping nulls in 'a':", len(pdf_dropped))
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_dropped = df.drop_nulls(['a'])
    result = nw.to_native(df_dropped)
    print("Narwhals after dropping nulls in 'a':", len(result))
    print()

# 15. TYPE CAST
def test_type_cast():
    print("15. TYPE CAST")
    pdf = pd.DataFrame({'a': [1, 2, 3]})
    
    # Pandas way
    pdf_cast = pdf.astype({'a': 'float64'})
    print("Pandas dtype:", pdf_cast['a'].dtype)
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_cast = df.with_columns(nw.col('a').cast(nw.Float64).alias('a'))
    result = nw.to_native(df_cast)
    print("Narwhals dtype:", result['a'].dtype)
    print()

# 16. STRING CONTAINS
def test_string_contains():
    print("16. STRING CONTAINS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_filtered = pdf[pdf.s.str.contains('foo', regex=False)]
    print("Pandas strings with 'foo':", pdf_filtered['s'].tolist())
    
    # Narwhals way (literal=True ≈ regex=False)
    df = nw.from_native(pdf)
    df_filtered = df.filter(nw.col('s').str.contains('foo', literal=True))
    result = nw.to_native(df_filtered)
    print("Narwhals strings with 'foo':", result['s'].tolist())
    print()

# 17. STRING STARTS/ENDS
def test_string_starts_ends():
    print("17. STRING STARTS/ENDS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_starts = pdf[pdf.s.str.startswith('f')]
    print("Pandas starts with 'f':", pdf_starts['s'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_starts = df.filter(nw.col('s').str.starts_with('f'))
    result = nw.to_native(df_starts)
    print("Narwhals starts with 'f':", result['s'].tolist())
    print()

# 18. STRING LOWER/UPPER
def test_string_case():
    print("18. STRING LOWER/UPPER")
    pdf = pd.DataFrame({'s': ['Hello', 'WORLD', 'MiXeD']})
    
    # Pandas way
    pdf_lower = pdf.copy()
    pdf_lower['s'] = pdf_lower['s'].str.lower()
    print("Pandas lower:", pdf_lower['s'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_lower = df.with_columns(nw.col('s').str.to_lowercase().alias('s'))
    result = nw.to_native(df_lower)
    print("Narwhals lower:", result['s'].tolist())
    print()

# 19. CONDITIONAL COLUMN
def test_conditional():
    print("19. CONDITIONAL COLUMN")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf['z'] = np.where(pdf.x > 0, 'Positive', 'Negative')
    print("Pandas conditional:", pdf['z'].tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_new = df.with_columns(
        nw.when(nw.col('x') > 0).then(nw.lit('Positive')).otherwise(nw.lit('Negative')).alias('z')
    )
    result = nw.to_native(df_new)
    print("Narwhals conditional:", result['z'].tolist())
    print()

# 20. GROUP & AGGREGATE
def test_group_aggregate():
    print("20. GROUP & AGGREGATE")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_agg = pdf.groupby('g')['b'].agg(['sum', 'mean']).reset_index()
    print("Pandas grouped:", pdf_agg.values.tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_agg = df.group_by('g').agg(
        nw.col('b').sum().alias('sum'),
        nw.col('b').mean().alias('mean')
    )
    result = nw.to_native(df_agg)
    print("Narwhals grouped:", result.values.tolist())
    print()

# 21. GROUP ROW COUNT
def test_group_count():
    print("21. GROUP ROW COUNT")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf_count = pdf.groupby('g').size().reset_index(name='size')
    print("Pandas group sizes:", pdf_count.values.tolist())
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_count = df.group_by('g').agg(nw.len().alias('size'))
    result = nw.to_native(df_count)
    print("Narwhals group sizes:", result.values.tolist())
    print()

# 22. WINDOW FUNCTIONS
def test_window_functions():
    print("22. WINDOW FUNCTIONS")
    pdf, plf = create_sample_data()
    
    # Pandas way
    pdf['b_sum'] = pdf.groupby('g')['b'].transform('sum')
    print("Pandas window sum:", list(zip(pdf['g'], pdf['b'], pdf['b_sum'])))
    
    # Narwhals way
    df = nw.from_native(pdf)
    df_new = df.with_columns(nw.col('b').sum().over('g').alias('b_sum'))
    result = nw.to_native(df_new)
    print("Narwhals window sum:", list(zip(result['g'], result['b'], result['b_sum'])))
    print()

# 23. EXPLODE LIST COLUMN
def test_explode():
    print("23. EXPLODE LIST COLUMN")
    # For Polars, we need to create the DataFrame differently
    # to ensure the list column is properly typed
    
    # Create with Polars first (it handles list types better)
    plf = pl.DataFrame({'id': [1, 2], 'lst': [[1, 2, 3], [4, 5]]})
    
    # For pandas comparison
    pdf = pd.DataFrame({'id': [1, 2], 'lst': [[1, 2, 3], [4, 5]]})
    pdf_exploded = pdf.explode('lst')
    print("Pandas exploded:", pdf_exploded['lst'].tolist())
    
    # Narwhals way - using Polars backend which properly handles lists
    df = nw.from_native(plf)
    df_exploded = df.explode('lst')
    result = nw.to_native(df_exploded)
    print("Narwhals exploded (Polars backend):", result['lst'].to_list())
    
    # Note: For pandas backend with Narwhals, list columns need special handling
    # The issue is that pandas doesn't have a native List dtype like Polars
    print()

# 24. JOIN OPERATIONS
def test_joins():
    print("24. JOIN OPERATIONS")
    pdf1 = pd.DataFrame({'id': [1, 2, 3], 'val1': ['a', 'b', 'c']})
    pdf2 = pd.DataFrame({'id': [2, 3, 4], 'val2': ['x', 'y', 'z']})
    
    # Pandas inner join
    pdf_joined = pdf1.merge(pdf2, on='id', how='inner')
    print("Pandas inner join:", pdf_joined.values.tolist())
    
    # Narwhals inner join
    df1 = nw.from_native(pdf1)
    df2 = nw.from_native(pdf2)
    df_joined = df1.join(df2, on='id', how='inner')
    result = nw.to_native(df_joined)
    print("Narwhals inner join:", result.values.tolist())
    print()

# Run all tests
if __name__ == "__main__":
    print("=" * 60)
    print("PANDAS VS NARWHALS: VERIFICATION OF ALL IDIOMS")
    print("=" * 60)
    print()
    
    test_wrap_unwrap()
    test_select_columns()
    test_add_column()
    test_rename_columns()
    test_drop_columns()
    test_filter_rows()
    test_sort()
    test_head_tail()
    test_sample()
    test_distinct()
    test_unique_values()
    test_coalesce()
    test_fill_missing()
    test_drop_missing()
    test_type_cast()
    test_string_contains()
    test_string_starts_ends()
    test_string_case()
    test_conditional()
    test_group_aggregate()
    test_group_count()
    test_window_functions()
    test_explode()
    test_joins()
    
    print("=" * 60)
    print("ALL TESTS COMPLETED")
    print("=" * 60)

PANDAS VS NARWHALS: VERIFICATION OF ALL IDIOMS

1. WRAP/UNWRAP AT I/O EDGE
Wrapped pandas type: <class 'narwhals.dataframe.DataFrame'>
Wrapped polars type: <class 'narwhals.dataframe.DataFrame'>
Unwrapped pandas type: <class 'pandas.core.frame.DataFrame'>
Unwrapped polars type: <class 'polars.dataframe.frame.DataFrame'>

2. SELECT COLUMNS
Pandas: ['a', 'b']
Narwhals: ['a', 'b']
Narwhals (col): ['a', 'b']

3. ADD/REPLACE COLUMN
Pandas new column 'z': [3, 2, 9, 4, 15]
Narwhals new column 'z': [3, 2, 9, 4, 15]

4. RENAME COLUMNS
Pandas renamed: ['alpha', 'beta']
Narwhals renamed: ['alpha', 'beta']

5. DROP COLUMNS
Pandas remaining: ['a', 'b', 'x', 'y', 'v', 'd', 'lst']
Narwhals remaining: ['a', 'b', 'x', 'y', 'v', 'd', 'lst']

6. FILTER ROWS
Pandas filtered x values: [1, 3, 5]
Narwhals filtered x values: [1, 3, 5]

7. SORT
Pandas sorted (g,a): [('A', 5), ('A', 3), ('A', 1), ('B', 4), ('B', 2)]
Narwhals sorted (g,a): [('A', 5), ('A', 3), ('A', 1), ('B', 4), ('B', 2)]

8. HEAD/TAIL
Pandas h