In [10]:
%%capture 
!pip install -r requirements.txt

In [7]:
from enum import Enum

class Scaling(Enum):
    INDEPENDENT = 1
    JOINT = 2

### Data loading & pre-processing

In [5]:
import pandas as pd
from sklearn import preprocessing

def preprocess_training_data(train: pd.DataFrame, scaling_strategy: Scaling) -> pd.DataFrame:
    # 1. replace all 100 values with -110 (ensures continuity of data)
    df = train.replace(100, -110)
    
    # 2. Scale the data to have zero mean and unit variance
    # This is done either independently for each AP or jointly for all APs
    if scaling_strategy == Scaling.INDEPENDENT:
        scaler = preprocessing.StandardScaler()
        df = pd.DataFrame(scaler.fit_transform(df), columns=df.columns)
    
    elif scaling_strategy == Scaling.JOINT:
        flattened = df.values.flatten()
        global_mean = flattened.mean()
        global_std = flattened.std()
        
        df = (df - global_mean) / global_std
    
    return df



def get_preprocessed_data(data_path: str, training_months: list[str], num_APs: int) -> pd.DataFrame:
    # Since the csv files do not have column names, we define these first.
    list_of_APs = ["AP" + str(i) for i in range(0, num_APs)]


    # Load the training data from all specified training sets.  
    df = pd.concat([pd.read_csv(data_path + training_set + 'trn01rss.csv', names=list_of_APs) for training_set in training_months])
    df = df.reset_index()

    # Pre-processing of the training data
    df = preprocess_training_data(df, Scaling.JOINT)

    return df
    

train = get_preprocessed_data("./data/", ["02/", "03/", "04/", "05/"], 620)
train

Unnamed: 0,index,AP0,AP1,AP2,AP3,AP4,AP5,AP6,AP7,AP8,...,AP610,AP611,AP612,AP613,AP614,AP615,AP616,AP617,AP618,AP619
0,0,-86,-52,-69,-88,-82,-88,-69,-90,-110,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
1,1,-87,-49,-59,-86,-73,-85,-59,-85,-110,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
2,2,-86,-48,-61,-84,-80,-110,-61,-84,-110,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
3,3,-110,-48,-61,-84,-77,-86,-61,-84,-110,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
4,4,-110,-47,-62,-85,-78,-86,-61,-110,-110,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2299,571,-77,-110,-86,-78,-110,-77,-83,-72,-90,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
2300,572,-75,-89,-110,-78,-110,-74,-110,-74,-110,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
2301,573,-78,-89,-110,-77,-110,-76,-85,-77,-87,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
2302,574,-78,-110,-110,-77,-85,-78,-110,-76,-110,...,-110,-110,-110,-110,-110,-110,-110,-110,-110,-110
