In [50]:
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from datetime import datetime

In [51]:
at200 = pd.read_csv("../data/csv_files/at200.csv")

In [52]:
# preprocess
def preprocess(df):
    df2 = df.dropna()
    le = LabelEncoder()
    df2.iloc[:, 7] = le.fit_transform(df2.iloc[:, 7]) #encode the station id
    df2['Date Time'] = df2['Date Time'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f').timestamp() * 1000) # encode date time
    df2 = df2.apply(pd.to_numeric, errors='coerce')

    #normalize
    scaler = StandardScaler()
    df2 = pd.DataFrame(scaler.fit_transform(df2), columns=df2.columns)

    tensor = df2.values
    tensor = torch.tensor(tensor, dtype = torch.float32)
    return tensor

In [53]:
preprocess_at200 = preprocess(at200)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df2['Date Time'] = df2['Date Time'].apply(lambda x: datetime.strptime(x, '%Y-%m-%d %H:%M:%S.%f').timestamp() * 1000) # encode date time


In [57]:

import torch
import torch.nn as nn

# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim):
        super(Autoencoder, self).__init__()

        # Encoder
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 6),  # Input layer with 10 hidden units
            nn.ReLU(),
            nn.Linear(6, 3),  # Hidden layer with 16 hidden units
            nn.ReLU()
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.Linear(3, 6),
            nn.ReLU(),
            nn.Linear(6, input_dim)

         # nn.Sigmoi# Output layer with same dimension as input
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded


In [60]:

# Create an instance of the Autoencoder
batches = DataLoader(preprocess_at200, batch_size = 32, shuffle = True)
model = Autoencoder(input_dim=10)

# Define optimizer and loss function
optimizer = torch.optim.Adam(model.parameters())
loss_fn = nn.MSELoss()

# Train the autoencoder
for epoch in range(100):  
    total_loss = 0
    model.train()
    for batch in batches:
        # Forward pass
        outputs = model(batch)
        loss = loss_fn(outputs, batch)
        
        # Backward pass and update weights
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
    model.eval()
    avg_loss = total_loss / len(batches)
    print(f"Epoch: {epoch}  avg Loss: {avg_loss}")

print("Autoencoder training complete!")

Epoch: 0  avg Loss: 0.35664396573354296
Epoch: 1  avg Loss: 0.09019972608903923
Epoch: 2  avg Loss: 0.054030212695238715
Epoch: 3  avg Loss: 0.048410672435009294
Epoch: 4  avg Loss: 0.04648895971754099
Epoch: 5  avg Loss: 0.044298847212233886
Epoch: 6  avg Loss: 0.0413095926402478
Epoch: 7  avg Loss: 0.037704866757947625
Epoch: 8  avg Loss: 0.03442351232856472
Epoch: 9  avg Loss: 0.03222705874758306
Epoch: 10  avg Loss: 0.030847508323638927
Epoch: 11  avg Loss: 0.030156010760597637
Epoch: 12  avg Loss: 0.02968275011280971
Epoch: 13  avg Loss: 0.02917775687049417
Epoch: 14  avg Loss: 0.02882609849995943
Epoch: 15  avg Loss: 0.028508719595865754
Epoch: 16  avg Loss: 0.028014582758763097
Epoch: 17  avg Loss: 0.027446434053543897
Epoch: 18  avg Loss: 0.026565194688485512
Epoch: 19  avg Loss: 0.02543328533376025
Epoch: 20  avg Loss: 0.024126345855312988
Epoch: 21  avg Loss: 0.023087069957672623
Epoch: 22  avg Loss: 0.022440037185715373
Epoch: 23  avg Loss: 0.02211932487599426
Epoch: 24  avg