In [1]:
import torch 
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import numpy as np

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu" )
print(device)

cuda


In [None]:
"""

# Step 3: Convert to PyTorch tensors and split into train/test sets
def prepare_tensors(train_df, test_df, device):

    # Step 1: Select the relevant input variables (4 variables)
    train_features = train_df[['id_k', 'iq_k', 'epsilon_k']].copy()
    test_features = test_df[['id_k', 'iq_k', 'epsilon_k']].copy()

    # Step 2: Apply the transformation for 'cos(epsilon_k)' and 'sin(epsilon_k)'
    train_features['cos_epsilon_k'] = train_features['epsilon_k'].apply(lambda x: np.cos(x))
    train_features['sin_epsilon_k'] = train_features['epsilon_k'].apply(lambda x: np.sin(x))
    
    test_features['cos_epsilon_k'] = test_features['epsilon_k'].apply(lambda x: np.cos(x))
    test_features['sin_epsilon_k'] = test_features['epsilon_k'].apply(lambda x: np.sin(x))
    
    # Remove the 'epsilon_k' column after transformation
    train_features = train_features.drop(columns=['epsilon_k'])
    test_features = test_features.drop(columns=['epsilon_k'])

    # Step 3: Select target variables (id_k1, iq_k1)
    train_target = train_df[['id_k1', 'iq_k1']].to_numpy()
    test_target = test_df[['id_k1', 'iq_k1']].to_numpy()

    # Step 4: Scale features using MinMaxScaler
    scaler = MinMaxScaler()
    normalized_train_features = scaler.fit_transform(train_features)
    normalized_test_features = scaler.transform(test_features)

    # Step 5: Convert features and target variables to PyTorch tensors
    train_data_tensor = torch.tensor(normalized_train_features.to_numpy(), dtype=torch.float32)
    test_data_tensor = torch.tensor(normalized_test_features.to_numpy(), dtype=torch.float32)

    train_target_tensor = torch.tensor(train_target, dtype=torch.float32)
    test_target_tensor = torch.tensor(test_target, dtype=torch.float32)

    # Step 6: Split the data into train and test sets (already done by using train/test CSV)
    train_data, test_data = train_data_tensor, test_data_tensor
    train_target, test_target = train_target_tensor, test_target_tensor

    # Step 7: Create DataLoader for batching
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_data, train_target), batch_size=512, shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_data, test_target), batch_size=512, shuffle=False)

    # Step 8: Initialize empty tensors to store inputs and labels
    train_inputs = torch.empty(0, 4, device=device)
    train_labels = torch.empty(0, 2, dtype=torch.float32, device=device)
    test_inputs = torch.empty(0, 4, device=device)
    test_labels = torch.empty(0, 2, dtype=torch.float32, device=device)

    # Process training data
    for data, labels in tqdm(train_loader, desc="Processing Train Data"):
        train_inputs = torch.cat((train_inputs, data.to(device)), dim=0)
        train_labels = torch.cat((train_labels, labels.to(device)), dim=0)

    # Process testing data
    for data, labels in tqdm(test_loader, desc="Processing Test Data"):
        test_inputs = torch.cat((test_inputs, data.to(device)), dim=0)
        test_labels = torch.cat((test_labels, labels.to(device)), dim=0)

    # Store the dataset in a dictionary
    dataset = {}
    dataset['train_input'] = train_inputs
    dataset['test_input'] = test_inputs
    dataset['train_label'] = train_labels
    dataset['test_label'] = test_labels

    return dataset"""


In [None]:
def prepare_tensors(train_df, test_df, device):
    # Step 1: Select the relevant input variables (3 variables initially)
    train_features = train_df[['id_k', 'iq_k', 'epsilon_k']].copy()
    test_features = test_df[['id_k', 'iq_k', 'epsilon_k']].copy()

    # Step 2: Apply the transformation for 'cos(epsilon_k)' and 'sin(epsilon_k)'
    train_features['cos_epsilon_k'] = train_features['epsilon_k'].apply(lambda x: np.cos(x))
    train_features['sin_epsilon_k'] = train_features['epsilon_k'].apply(lambda x: np.sin(x))
    test_features['cos_epsilon_k'] = test_features['epsilon_k'].apply(lambda x: np.cos(x))
    test_features['sin_epsilon_k'] = test_features['epsilon_k'].apply(lambda x: np.sin(x))

    # Remove the 'epsilon_k' column after transformation
    train_features = train_features.drop(columns=['epsilon_k'])
    test_features = test_features.drop(columns=['epsilon_k'])

    # Step 3: Select target variables (id_k1, iq_k1)
    train_target = train_df[['id_k1', 'iq_k1']].to_numpy()
    test_target = test_df[['id_k1', 'iq_k1']].to_numpy()

    # Step 4: Scale each feature separately using its known range
    scaler_id = MinMaxScaler(feature_range=(0, 1))
    scaler_iq = MinMaxScaler(feature_range=(0, 1))
    scaler_cos = MinMaxScaler(feature_range=(0, 1))
    scaler_sin = MinMaxScaler(feature_range=(0, 1))

    # Fit scalers with known ranges
    scaler_id.fit([[-240], [0]])
    scaler_iq.fit([[0], [240]])
    scaler_cos.fit([[-1], [1]])
    scaler_sin.fit([[-1], [1]])

    # Apply scalers to each column
    train_features_scaled = train_features.copy()
    train_features_scaled['id_k'] = scaler_id.transform(train_features[['id_k']])
    train_features_scaled['iq_k'] = scaler_iq.transform(train_features[['iq_k']])
    train_features_scaled['cos_epsilon_k'] = scaler_cos.transform(train_features[['cos_epsilon_k']])
    train_features_scaled['sin_epsilon_k'] = scaler_sin.transform(train_features[['sin_epsilon_k']])

    test_features_scaled = test_features.copy()
    test_features_scaled['id_k'] = scaler_id.transform(test_features[['id_k']])
    test_features_scaled['iq_k'] = scaler_iq.transform(test_features[['iq_k']])
    test_features_scaled['cos_epsilon_k'] = scaler_cos.transform(test_features[['cos_epsilon_k']])
    test_features_scaled['sin_epsilon_k'] = scaler_sin.transform(test_features[['sin_epsilon_k']])

    # Step 5: Convert features and target variables to PyTorch tensors
    train_data_tensor = torch.tensor(train_features_scaled.to_numpy(), dtype=torch.float32)
    test_data_tensor = torch.tensor(test_features_scaled.to_numpy(), dtype=torch.float32)
    train_target_tensor = torch.tensor(train_target, dtype=torch.float32)
    test_target_tensor = torch.tensor(test_target, dtype=torch.float32)

    # Step 6: Create DataLoader for batching
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(train_data_tensor, train_target_tensor), batch_size=512, shuffle=True)
    test_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(test_data_tensor, test_target_tensor), batch_size=512, shuffle=False)

    # Step 7: Initialize empty tensors to store inputs and labels
    train_inputs = torch.empty(0, 4, device=device)
    train_labels = torch.empty(0, 2, dtype=torch.float32, device=device)
    test_inputs = torch.empty(0, 4, device=device)
    test_labels = torch.empty(0, 2, dtype=torch.float32, device=device)

    # Process training data
    for data, labels in tqdm(train_loader, desc="Processing Train Data"):
        train_inputs = torch.cat((train_inputs, data.to(device)), dim=0)
        train_labels = torch.cat((train_labels, labels.to(device)), dim=0)

    # Process testing data
    for data, labels in tqdm(test_loader, desc="Processing Test Data"):
        test_inputs = torch.cat((test_inputs, data.to(device)), dim=0)
        test_labels = torch.cat((test_labels, labels.to(device)), dim=0)

    # Store the dataset in a dictionary
    dataset = {}
    dataset['train_input'] = train_inputs
    dataset['test_input'] = test_inputs
    dataset['train_label'] = train_labels
    dataset['test_label'] = test_labels

    return dataset


In [4]:

# Load the training and testing data from CSV files
train_df = pd.read_csv('../Data/train_data.csv')
test_df = pd.read_csv('../Data/test_data.csv')
# Prepare tensors and split the data
model1_dataset = prepare_tensors(train_df,test_df,device)

print("Prepared Dataset:")
print(model1_dataset)

AttributeError: 'numpy.ndarray' object has no attribute 'to_numpy'

In [None]:

# Save a tensor
torch.save(model1_dataset, '..\Data\Dataset_Electric_Motor.pt')
print("Train data shape: {}".format(model1_dataset['train_input'].shape))
print("Train target shape: {}".format(model1_dataset['train_label'].shape))
print("Test data shape: {}".format(model1_dataset['test_input'].shape))
print("Test target shape: {}".format(model1_dataset['test_label'].shape))
print("====================================")

Train data shape: torch.Size([169488, 4])
Train target shape: torch.Size([169488, 2])
Test data shape: torch.Size([42372, 4])
Test target shape: torch.Size([42372, 2])
