Dado que en casa no tengo GPU Nvidia, usé google colabs y necesito instalar algunas librerías.

In [None]:
!pip install polars torch tensorboard


In [1]:
import torch
from torch.utils.tensorboard import SummaryWriter
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Using device:', device)
if device.type == 'cuda':
    print(torch.cuda.get_device_name(0))
    print('Memory Usage:')
    print('Allocated:', round(torch.cuda.memory_allocated(0)/1024**3,1), 'GB')
    print('Cached:   ', round(torch.cuda.memory_reserved(0)/1024**3,1), 'GB')

Using device: cuda
Tesla T4
Memory Usage:
Allocated: 0.0 GB
Cached:    0.0 GB


In [3]:
import polars as pl

col_names = ['modified Zurich class', 'largest spot size', 'spot distribution', 'activity', 'evolution',
             'previous 24 hour flare activity', 'historically-complex', 'became complex on this pass', 'area',
             'area of largest spot', 'common flares', 'moderate flares', 'severe flares']


lf = pl.read_csv(
    'flare.data1',
    separator=' ',
    has_header=False,
    new_columns=col_names,
    skip_rows=1
).lazy()


lf.head(5).collect()

modified Zurich class,largest spot size,spot distribution,activity,evolution,previous 24 hour flare activity,historically-complex,became complex on this pass,area,area of largest spot,common flares,moderate flares,severe flares
str,str,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64
"""C""","""S""","""O""",1,2,1,1,2,1,2,0,0,0
"""D""","""S""","""O""",1,3,1,1,2,1,2,0,0,0
"""C""","""S""","""O""",1,3,1,1,2,1,1,0,0,0
"""D""","""S""","""O""",1,3,1,1,2,1,2,0,0,0
"""D""","""A""","""O""",1,3,1,1,2,1,2,0,0,0


Codificación manual de las columnas categóricas

In [6]:
# Posibles valores de cada columna
zurich_classes = ['A', 'B', 'C', 'D', 'E', 'F', 'H']
spot_sizes = ['X', 'R', 'S', 'A', 'H', 'K']
distributions = ['X', 'O', 'I', 'C']

# configuración de cada columna
# se guardan las 3 en una lista para añadirlas más tarde al dataset
expr_list = [
    (pl.col('modified Zurich class') == z).cast(pl.Int8).alias(f"zurich class {z}")
    for z in zurich_classes
] + [
    (pl.col('largest spot size') == s).cast(pl.Int8).alias(f"largest spot size {s}")
    for s in spot_sizes
] + [
    (pl.col('spot distribution') == d).cast(pl.Int8).alias(f"spot distribution {d}")
    for d in distributions
]

# añadir las nuevas columnas dummy al dataset original
lf_encoded = lf.with_columns(expr_list)

# eliminamos las columnas categóricas que ya no nos hacen falta
lf_encoded = lf_encoded.drop(['modified Zurich class', 'largest spot size', 'spot distribution'])


lf_encoded.head(5).collect()



activity,evolution,previous 24 hour flare activity,historically-complex,became complex on this pass,area,area of largest spot,common flares,moderate flares,severe flares,zurich class A,zurich class B,zurich class C,zurich class D,zurich class E,zurich class F,zurich class H,largest spot size X,largest spot size R,largest spot size S,largest spot size A,largest spot size H,largest spot size K,spot distribution X,spot distribution O,spot distribution I,spot distribution C
i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8,i8
1,2,1,1,2,1,2,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0
1,3,1,1,2,1,2,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0
1,3,1,1,2,1,1,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,1,0,0
1,3,1,1,2,1,2,0,0,0,0,0,0,1,0,0,0,0,0,1,0,0,0,0,1,0,0
1,3,1,1,2,1,2,0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0


Separar target

In [8]:
columnas_target = [
    'common flares',
    'moderate flares',
    'severe flares'
]

resto = [columna for columna in lf_encoded.columns if columna not in columnas_target]

X = lf_encoded.select([pl.col(col) for col in resto])
Y = lf_encoded.select([pl.col(col) for col in columnas_target])


print(X.head(3).collect())
print(Y.head(3).collect())

s

shape: (3, 24)
┌──────────┬───────────┬───────────┬───────────┬───┬───────────┬───────────┬───────────┬───────────┐
│ activity ┆ evolution ┆ previous  ┆ historica ┆ … ┆ spot dist ┆ spot dist ┆ spot dist ┆ spot dist │
│ ---      ┆ ---       ┆ 24 hour   ┆ lly-compl ┆   ┆ ribution  ┆ ribution  ┆ ribution  ┆ ribution  │
│ i64      ┆ i64       ┆ flare     ┆ ex        ┆   ┆ X         ┆ O         ┆ I         ┆ C         │
│          ┆           ┆ activit…  ┆ ---       ┆   ┆ ---       ┆ ---       ┆ ---       ┆ ---       │
│          ┆           ┆ ---       ┆ i64       ┆   ┆ i8        ┆ i8        ┆ i8        ┆ i8        │
│          ┆           ┆ i64       ┆           ┆   ┆           ┆           ┆           ┆           │
╞══════════╪═══════════╪═══════════╪═══════════╪═══╪═══════════╪═══════════╪═══════════╪═══════════╡
│ 1        ┆ 2         ┆ 1         ┆ 1         ┆ … ┆ 0         ┆ 1         ┆ 0         ┆ 0         │
│ 1        ┆ 3         ┆ 1         ┆ 1         ┆ … ┆ 0         ┆ 1         ┆

  resto = [columna for columna in lf_encoded.columns if columna not in columnas_target]
