In [1]:
import pytorch_lightning as pl
import torch.utils.data as data
import torch.nn as nn
import pandas as pd
import torch
from sklearn.model_selection import train_test_split


In [2]:
class PandasDataset(data.Dataset):
    features: pd.DataFrame
    targets: pd.DataFrame

    def __init__(self, features: pd.DataFrame, targets: pd.DataFrame):
        self.features = features
        self.targets = targets

    def __getitem__(self, index):
        return self.features.iloc[index].to_numpy(), self.targets.iloc[index].to_numpy()

    def __len__(self):
        return len(self.features)


class ElectionDataModule(pl.LightningDataModule):
    batch_size: int

    stratification_columns: list[str]
    feature_columns: list[str]
    target_columns: list[str]

    data_train: PandasDataset
    data_test: PandasDataset
    data_val: PandasDataset

    def __init__(self,stratification_columns: list[str],feature_columns: list[str],target_columns: list[str],batch_size=32):
        self.stratification_columns = stratification_columns
        self.target_columns = target_columns
        self.columns = feature_columns
        self.batch_size = batch_size

    def setup(self):
        df = pd.read_csv("./data/volby/dataset_extended.csv")
        aux_indices, test_indices = train_test_split(df.index, train_size= 3/20, stratify = df[self.stratification_columns])
        train_indices, val_indices = train_test_split(aux_indices.index, train_size= 3/17, stratify=df[self.stratification_columns].iloc[aux_indices])
        
        self.data_test = PandasDataset(df[self.feature_columns].iloc[train_indices],df[self.target_columns].iloc[train_indices])
        self.data_train = PandasDataset(df[self.feature_columns].iloc[test_indices],df[self.target_columns].iloc[test_indices])
        self.data_val = PandasDataset(df[self.feature_columns].iloc[val_indices],df[self.target_columns].iloc[val_indices])

    def train_dataloader(self):
        return data.DataLoader(self.data_train, batch_size = self.batch_size)

    def test_dataloader(self):
        return data.DataLoader(self.data_train, batch_size = self.batch_size)
    
    def validation_dataloader(self):
        return data.DataLoader(self.data_val, batch_size=self.batch_size)

In [3]:
class SimpleModule(nn.Module):

    def __init__(self, num_features: int):
        super(SimpleModule, self).__init__()
        self.layer = nn.Linear(num_features, 1)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.layer(x)

In [4]:
x = torch.rand(1)