In [1]:
import polars as pl
import folium
from shapely import wkt

In [2]:
df = pl.read_csv('caves.csv')

In [3]:
df.shape

(12861, 8)

In [4]:
df.head()

Unnamed: 0_level_0,itemLabel,coordinate_location,countryLabel,temperature,vertical_depth,elevation_above_sea_level,length
i64,str,str,str,str,f64,f64,f64
0,"""Q100269443""","""Point(8.28495 …","""Alemanha""",,,,
1,"""Q100757214""","""Point(8.240099…","""Alemanha""",,,,
2,"""Q1009597""","""Point(7.664722…","""Alemanha""",,,,
3,"""Q101579973""","""Point(2.66501 …","""França""",,,115.0,31.0
4,"""Q102178845""","""Point(22.22841…","""Sérvia""",,,,


In [5]:
df = df.with_columns(pl.col('temperature').cast(pl.Float64()))

In [6]:
df_brasil = df.filter(
    pl.col('countryLabel') == 'Brasil'
)

In [7]:
df_brasil.head()

Unnamed: 0_level_0,itemLabel,coordinate_location,countryLabel,temperature,vertical_depth,elevation_above_sea_level,length
i64,str,str,str,f64,f64,f64,f64
120,"""Q114347515""","""Point(-40.5704…","""Brasil""",,,,
3874,"""Abismo Ouro Gr…","""Point(-48.6795…","""Brasil""",,,,
4136,"""Areado Grande""","""Point(-46.3671…","""Brasil""",,,,
4809,"""Cave of Saint …","""Point(-55.3831…","""Brasil""",,,683.0,
4849,"""Caverna Aroe J…","""Point(-55.293 …","""Brasil""",,,,


In [8]:
count_country = df.group_by('countryLabel').agg(pl.count('countryLabel').alias('count')).sort('count', descending=True)
print(count_country.head(10))

shape: (10, 2)
┌──────────────────────┬───────┐
│ countryLabel         ┆ count │
│ ---                  ┆ ---   │
│ str                  ┆ u32   │
╞══════════════════════╪═══════╡
│ Espanha              ┆ 2838  │
│ Alemanha             ┆ 1004  │
│ Polónia              ┆ 888   │
│ França               ┆ 659   │
│ Hungria              ┆ 584   │
│ Brasil               ┆ 517   │
│ Bulgária             ┆ 444   │
│ Reino Unido          ┆ 411   │
│ Estados Unidos       ┆ 336   │
│ Bósnia e Herzegovina ┆ 285   │
└──────────────────────┴───────┘


In [9]:
brasil_count = count_country.filter(
    pl.col('countryLabel') == 'Brazil'
)
print(brasil_count)

shape: (0, 2)
┌──────────────┬───────┐
│ countryLabel ┆ count │
│ ---          ┆ ---   │
│ str          ┆ u32   │
╞══════════════╪═══════╡
└──────────────┴───────┘


In [10]:
temperature = (
    df.group_by('countryLabel')
    .agg(
        (pl.col('temperature').mean().alias('mean_temperature'))
    )
    .filter(
        (pl.col('mean_temperature').is_not_null())
    )
    .sort('mean_temperature', descending=True)
)
print(temperature)

shape: (3, 2)
┌──────────────┬──────────────────┐
│ countryLabel ┆ mean_temperature │
│ ---          ┆ ---              │
│ str          ┆ f64              │
╞══════════════╪══════════════════╡
│ França       ┆ 13.0             │
│ Bulgária     ┆ 10.427273        │
│ Alemanha     ┆ 3.0              │
└──────────────┴──────────────────┘


In [11]:
length = (
    df.filter(
        (pl.col('length').is_not_null())
    )
)
print(length.head(5))

shape: (5, 8)
┌─────┬────────────┬──────────────┬─────────────┬─────────────┬─────────────┬─────────────┬────────┐
│     ┆ itemLabel  ┆ coordinate_l ┆ countryLabe ┆ temperature ┆ vertical_de ┆ elevation_a ┆ length │
│ --- ┆ ---        ┆ ocation      ┆ l           ┆ ---         ┆ pth         ┆ bove_sea_le ┆ ---    │
│ i64 ┆ str        ┆ ---          ┆ ---         ┆ f64         ┆ ---         ┆ vel         ┆ f64    │
│     ┆            ┆ str          ┆ str         ┆             ┆ f64         ┆ ---         ┆        │
│     ┆            ┆              ┆             ┆             ┆             ┆ f64         ┆        │
╞═════╪════════════╪══════════════╪═════════════╪═════════════╪═════════════╪═════════════╪════════╡
│ 3   ┆ Q101579973 ┆ Point(2.6650 ┆ França      ┆ null        ┆ null        ┆ 115.0       ┆ 31.0   │
│     ┆            ┆ 1 48.40216)  ┆             ┆             ┆             ┆             ┆        │
│ 10  ┆ Q105054041 ┆ Point(35.328 ┆ Israel      ┆ null        ┆ null        ┆

In [12]:
max_length = (
    length.group_by(pl.col(
        'countryLabel',
        'itemLabel',
        'coordinate_location',
        'vertical_depth'
        ))
    .agg(
        (pl.col('length').max())
    )
    .sort('length', descending=True)
).head(10)
print(max_length)

shape: (10, 5)
┌──────────────┬───────────────────────────┬───────────────────────────┬────────────────┬──────────┐
│ countryLabel ┆ itemLabel                 ┆ coordinate_location       ┆ vertical_depth ┆ length   │
│ ---          ┆ ---                       ┆ ---                       ┆ ---            ┆ ---      │
│ str          ┆ str                       ┆ str                       ┆ f64            ┆ f64      │
╞══════════════╪═══════════════════════════╪═══════════════════════════╪════════════════╪══════════╡
│ Ucrânia      ┆ Optymistychna Cave        ┆ Point(25.9737 48.7349)    ┆ 20.0           ┆ 264576.0 │
│ Eslováquia   ┆ Baradla cave              ┆ Point(20.5 48.4667)       ┆ null           ┆ 25500.0  │
│ Hungria      ┆ Baradla cave              ┆ Point(20.5 48.4667)       ┆ null           ┆ 25500.0  │
│ Polónia      ┆ Jaskinia Wielka Śnieżna   ┆ Point(19.923055555 49.24) ┆ null           ┆ 23753.0  │
│ Alemanha     ┆ Riesending-Schachthöhle   ┆ Point(12.9831 47.6994)    ┆ nul

In [13]:
vertical_depth = (
    df.filter(
        (pl.col('vertical_depth').is_not_null())
    )
)
print(vertical_depth.head(2))

shape: (2, 8)
┌─────┬────────────┬──────────────┬─────────────┬─────────────┬─────────────┬─────────────┬────────┐
│     ┆ itemLabel  ┆ coordinate_l ┆ countryLabe ┆ temperature ┆ vertical_de ┆ elevation_a ┆ length │
│ --- ┆ ---        ┆ ocation      ┆ l           ┆ ---         ┆ pth         ┆ bove_sea_le ┆ ---    │
│ i64 ┆ str        ┆ ---          ┆ ---         ┆ f64         ┆ ---         ┆ vel         ┆ f64    │
│     ┆            ┆ str          ┆ str         ┆             ┆ f64         ┆ ---         ┆        │
│     ┆            ┆              ┆             ┆             ┆             ┆ f64         ┆        │
╞═════╪════════════╪══════════════╪═════════════╪═════════════╪═════════════╪═════════════╪════════╡
│ 60  ┆ Q108795471 ┆ Point(20.280 ┆ Eslováquia  ┆ null        ┆ 18.0        ┆ null        ┆ 50.0   │
│     ┆            ┆ 333          ┆             ┆             ┆             ┆             ┆        │
│     ┆            ┆ 49.237167)   ┆             ┆             ┆             ┆

In [14]:
max_vertical_depth = (
    vertical_depth.group_by(
        pl.col([
            'countryLabel',
            'itemLabel',
            'coordinate_location',
            'length'
        ])
    )
    .agg(
        (pl.col('vertical_depth').max())
    )
    .sort('vertical_depth', descending=True)
).head(10)
print(max_vertical_depth)

shape: (10, 5)
┌──────────────┬───────────────────┬──────────────────────────────────┬────────┬────────────────┐
│ countryLabel ┆ itemLabel         ┆ coordinate_location              ┆ length ┆ vertical_depth │
│ ---          ┆ ---               ┆ ---                              ┆ ---    ┆ ---            │
│ str          ┆ str               ┆ str                              ┆ f64    ┆ f64            │
╞══════════════╪═══════════════════╪══════════════════════════════════╪════════╪════════════════╡
│ Geórgia      ┆ Veryovkina Cave   ┆ Point(40.35972 43.39753)         ┆ null   ┆ 2212.0         │
│ Abecásia     ┆ Caverna Voronya   ┆ Point(40.362222222 43.409722222) ┆ null   ┆ 2199.0         │
│ Geórgia      ┆ Caverna Voronya   ┆ Point(40.362222222 43.409722222) ┆ null   ┆ 2199.0         │
│ Eslovénia    ┆ Skalarjevo brezno ┆ Point(13.4663 46.3567)           ┆ 8524.0 ┆ 1209.0         │
│ Itália       ┆ Q3603719          ┆ Point(11.064416666 46.129694444) ┆ null   ┆ 387.0          │
│ Bul

In [15]:
elevation_above_sea_level = (
    df.filter(
        (pl.col('elevation_above_sea_level').is_not_null())
    )
)
print(elevation_above_sea_level.head())

shape: (5, 8)
┌─────┬────────────┬──────────────┬─────────────┬─────────────┬─────────────┬─────────────┬────────┐
│     ┆ itemLabel  ┆ coordinate_l ┆ countryLabe ┆ temperature ┆ vertical_de ┆ elevation_a ┆ length │
│ --- ┆ ---        ┆ ocation      ┆ l           ┆ ---         ┆ pth         ┆ bove_sea_le ┆ ---    │
│ i64 ┆ str        ┆ ---          ┆ ---         ┆ f64         ┆ ---         ┆ vel         ┆ f64    │
│     ┆            ┆ str          ┆ str         ┆             ┆ f64         ┆ ---         ┆        │
│     ┆            ┆              ┆             ┆             ┆             ┆ f64         ┆        │
╞═════╪════════════╪══════════════╪═════════════╪═════════════╪═════════════╪═════════════╪════════╡
│ 3   ┆ Q101579973 ┆ Point(2.6650 ┆ França      ┆ null        ┆ null        ┆ 115.0       ┆ 31.0   │
│     ┆            ┆ 1 48.40216)  ┆             ┆             ┆             ┆             ┆        │
│ 53  ┆ Q108046952 ┆ Point(5.4023 ┆ França      ┆ null        ┆ null        ┆

In [16]:
max_elevation_above_sea_level = (
    elevation_above_sea_level.group_by(
        (pl.col([
            'countryLabel',
            'itemLabel',
            'coordinate_location',
            'vertical_depth',
            'length'
            
        ]))
    )
    .agg(
        (pl.col('elevation_above_sea_level').max().alias('max_elevation_above_sea_level'))
    )
    .sort('max_elevation_above_sea_level', descending=True)
)
print(max_elevation_above_sea_level.head())

shape: (5, 6)
┌──────────────┬───────────────────┬──────────────────┬────────────────┬────────┬──────────────────┐
│ countryLabel ┆ itemLabel         ┆ coordinate_locat ┆ vertical_depth ┆ length ┆ max_elevation_ab │
│ ---          ┆ ---               ┆ ion              ┆ ---            ┆ ---    ┆ ove_sea_level    │
│ str          ┆ str               ┆ ---              ┆ f64            ┆ f64    ┆ ---              │
│              ┆                   ┆ str              ┆                ┆        ┆ f64              │
╞══════════════╪═══════════════════╪══════════════════╪════════════════╪════════╪══════════════════╡
│ Nepal        ┆ Milarepa Cave,    ┆ Point(84.0394    ┆ null           ┆ null   ┆ 13450.0          │
│              ┆ Gandaki           ┆ 28.6369)         ┆                ┆        ┆                  │
│ Chile        ┆ Cueva Granda      ┆ Point(-68.183333 ┆ null           ┆ null   ┆ 4662.0           │
│              ┆                   ┆ 333              ┆                ┆     

In [17]:
def map_generator(dataframe, initial_coord):
    map = folium.Map(location=initial_coord, zoom_start=5)
    df = dataframe
    for coord, name, country, length, vertical_depth in zip(df['coordinate_location'], df['itemLabel'], df['countryLabel'], df['length'], df['vertical_depth']):
        html = f"""
        <h4>Name: {name}</h4>
        <h5>Country: {country}</h5>
        <h5>Length: {length}</h5>
        <h5>Vertical Depth: {vertical_depth}</h5>
        <h6>
        Coordinates:
        <code>
            {coord}
        </code>
        </h6>
        """
        popup_text = html
        lon, lat = wkt.loads(coord).xy
        folium.Marker(location=[lat[0], lon[0]], popup=popup_text).add_to(map)

    return map

## Cavernas no Brasil

In [18]:
map = map_generator(df_brasil, [-14.235004, -51.92528])
map.save('cave_brasil.html')
map

## Maiores Cavernas

In [19]:
map = map_generator(max_length, [51.359441, 23.660791])
map.save('max_length.html')
map

## Cavernas Mais Profundas 

In [20]:
map = map_generator(max_vertical_depth, [51.359441, 23.660791])
map.save('max_vertical_depth.html')
map