In [182]:
from Users.project.data_container.data_container import AzureStorageAccess
import torch
import numpy as np
import pandas as pd
from torch import optim
from Users.project.loss_func import ILossFunc
from Users.project.model import ModelController
from Users.project.optimizer import IOptimizer
from Users.project.predict_lab_time_module.data_state import DataFeaturing, EFeatureType
from Users.project.my_utils import extract_track_from_path
from torch import nn
folder_access = AzureStorageAccess()

In [183]:
def timedelta_to_seconds(td):
    if isinstance(td, str):
        td = pd.to_timedelta(td)
    else:
        return None
    return td.total_seconds()

def create_dataset_from_dataframe(data_frame: pd.DataFrame, featuring: DataFeaturing) -> tuple[list, list]:
    rpm = []
    speed = []
    gear = []
    throttle = []
    brake = []
    drs = []

    x_data = []
    y_data = []

    prev_lap_number = 1.0
    prev_lap_time = 0.0
    for row in data_frame.itertuples():
        rpm.append(row.RPM)
        speed.append(row.Speed)
        gear.append(row.nGear)
        t = row.Throttle
        if t > 100:
            t = 100
        throttle.append(t)
        brake.append(row.Brake)
        drs.append(row.DRS)
        current_lap_number = row.LapNumber
        current_lap_time = row.Time
        
        if prev_lap_number != current_lap_number:
            # 한 랩이 끝났을 때 저장해둔 모든 데이터로 피쳐만들고 라벨(시간)만들기

            feature = []
            label = timedelta_to_seconds(prev_lap_time)
            if label == None:
                return x_data, y_data
            
            # 기어는 basic넣는게 더 나을거같은데
            feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.ZeroRatio)                             # 5
            feature += featuring.feature_by_list(rpm, EFeatureType.Basic | EFeatureType.ZeroRatio)                               # 5
            feature += featuring.feature_by_list(gear, EFeatureType.Basic | EFeatureType.Change)        # 5 + 3
            feature += featuring.feature_by_list(throttle, EFeatureType.Change)    # 5 + 3
            feature += featuring.feature_by_list(brake, EFeatureType.Boolean)                           # 3
            feature += featuring.feature_by_list(drs, EFeatureType.Basic)         # 5 + 3

            rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()

            x_data.append(feature)
            y_data.append(label)
                        
        prev_lap_number = current_lap_number
        prev_lap_time = current_lap_time

    return x_data, y_data

In [184]:
def train_regression_model(x_data: list, y_data: list,
                          model: nn.Module, device: torch.device,
                          loss_func, optimizer, epochs: int,
                          batch_size: int = 32, verbose: bool = True):
    model.to(device)
    model.train()

    # 데이터를 텐서로 변환 (한번만)
    x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
    y_tensor = torch.tensor(y_data, dtype=torch.float32).unsqueeze(1).to(device)
    
    dataset_size = len(x_data)
    
    for epoch in range(epochs):
        total_loss = 0.0
        num_batches = 0
        
        # 배치 단위로 학습
        for i in range(0, dataset_size, batch_size):
            end_idx = min(i + batch_size, dataset_size)
            
            x_batch = x_tensor[i:end_idx]
            y_batch = y_tensor[i:end_idx]
            
            # Forward pass
            predictions = model(x_batch)
            loss = loss_func(predictions, y_batch)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1

        if num_batches == 0:
            continue  # 다음 epoch으로

        avg_loss = total_loss / num_batches
        
        if verbose and (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Average Loss: {avg_loss:.4f}")

def predict_model(x_data: list, y_data: list,
                 model: nn.Module, device: torch.device,
                 return_predictions: bool = False) -> list[float]:
    """
    모델 예측 함수 (배치 처리 개선 버전)
    
    Args:
        x_data: 입력 데이터 리스트
        y_data: 실제 타겟 데이터 리스트
        model: 학습된 PyTorch 모델
        device: 연산 장치
        return_predictions: 예측값 리스트 반환 여부
    
    Returns:
        예측값 리스트 (return_predictions=True인 경우)
    """
    model.to(device)
    model.eval()
    
    predictions = []
    
    with torch.no_grad():
        # 전체 데이터를 한번에 텐서로 변환
        x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
        y_tensor = torch.tensor(y_data, dtype=torch.float32).to(device)

        # 배치로 예측 (더 효율적)
        pred_tensor = model(x_tensor).squeeze()
        predictions = pred_tensor.cpu().numpy().tolist()
        
        # 결과 출력
        for i in range(len(x_data)):
            actual = y_tensor[i].item()
            predicted = predictions[i] if isinstance(predictions, list) else predictions
            print(f"샘플 {i+1:>2}: 실제 = {actual:.2f}, 예측 = {predicted:.2f}, "
                  f"오차 = {abs(actual - predicted):.2f}")
    
    if return_predictions:
        return predictions

def evaluate_model(x_data: list, y_data: list,
                  model: nn.Module, device: torch.device) -> tuple[float, float]:
    """
    모델 성능 평가 함수
    
    Returns:
        MSE, MAE 튜플
    """
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
        y_tensor = torch.tensor(y_data, dtype=torch.float32).unsqueeze(1).to(device)
        
        predictions = model(x_tensor)
        
        mse = nn.MSELoss()(predictions, y_tensor).item()
        mae = nn.L1Loss()(predictions, y_tensor).item()
        
    return mse, mae

In [185]:
%load_ext autoreload
%autoreload 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class MyNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.mgnn = nn.Sequential(
            nn.Linear(31, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.mgnn(x)

model = MyNN().to(device)

loss_func = nn.MSELoss()   
o = optim.Adam(model.parameters(), lr = 0.001)

featuring = DataFeaturing()
count = 500
for file in folder_access.get_all_file():
    if ".csv" not in file.name: 
        continue

    track_name = extract_track_from_path(file.name)

    if track_name != "Australian_Grand_Prix":
        continue

    if "car_data_all.csv" not in file.name:
        continue

    data_frame = folder_access.read_csv_by_data_frame(file.name)
    x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
    if x_data == None or y_data == None:
        continue
    print(count)
    train_regression_model(x_data, y_data, model, device, loss_func, o, 50)
    count -= 1

    if count == 0:
        break  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
500
Epoch [10/50], Average Loss: 197378.0938
Epoch [20/50], Average Loss: 57653.4141
Epoch [30/50], Average Loss: 18694.0566
Epoch [40/50], Average Loss: 1934.3435
Epoch [50/50], Average Loss: 1708.5886
499
Epoch [10/50], Average Loss: 1320.0441
Epoch [20/50], Average Loss: 356.9250
Epoch [30/50], Average Loss: 155.0759
Epoch [40/50], Average Loss: 96.2264
Epoch [50/50], Average Loss: 69.0076
498
Epoch [10/50], Average Loss: 100.8569
Epoch [20/50], Average Loss: 88.5311
Epoch [30/50], Average Loss: 77.6357
Epoch [40/50], Average Loss: 70.8370
Epoch [50/50], Average Loss: 65.9815
497
Epoch [10/50], Average Loss: 902.2554
Epoch [20/50], Average Loss: 191.4158
Epoch [30/50], Average Loss: 111.9367
Epoch [40/50], Average Loss: 98.1410
Epoch [50/50], Average Loss: 77.0590
496
Epoch [10/50], Average Loss: 2784.8862
Epoch [20/50], Average Loss: 1951.5867
Epoch [30/50], Average Loss: 1218.7916
Epoch [40/50]

In [190]:
blob_name = "2023/2023_Australian_Grand_Prix_Q/ZHO/car_data_all.csv"
data_frame = folder_access.read_csv_by_data_frame(blob_name)

x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
predict_model(x_data, y_data, model, device)

x_data, y_data = evaluate_model(x_data, y_data, model, device)
print(x_data, y_data)

샘플  1: 실제 = 112.13, 예측 = 103.25, 오차 = 8.88
샘플  2: 실제 = 79.59, 예측 = 79.39, 오차 = 0.21
샘플  3: 실제 = 102.11, 예측 = 108.52, 오차 = 6.41
샘플  4: 실제 = 588.98, 예측 = 565.11, 오차 = 23.87
샘플  5: 실제 = 96.92, 예측 = 93.97, 오차 = 2.94
샘플  6: 실제 = 92.28, 예측 = 98.88, 오차 = 6.60
샘플  7: 실제 = 78.65, 예측 = 77.08, 오차 = 1.57
샘플  8: 실제 = 114.24, 예측 = 114.54, 오차 = 0.30
샘플  9: 실제 = 78.41, 예측 = 77.49, 오차 = 0.92
샘플 10: 실제 = 112.15, 예측 = 110.70, 오차 = 1.46
샘플 11: 실제 = 78.47, 예측 = 79.09, 오차 = 0.62
68.0086441040039 4.889381408691406


In [187]:
blob_name = "2023/2023_Australian_Grand_Prix_R/ZHO/car_data_all.csv"

data_frame = folder_access.read_csv_by_data_frame(blob_name)

rpm = []
speed = []
gear = []
throttle = []
brake = []
drs = []

prev_lap_number = 1.0
prev_lap_time = 0.0

for row in data_frame.itertuples():
    rpm.append(row.RPM)
    speed.append(row.Speed)
    gear.append(row.nGear)
    throttle.append(row.Throttle)
    brake.append(row.Brake)
    drs.append(row.DRS)
    current_lap_number = row.LapNumber
    current_lap_time = row.Time
    
    if prev_lap_number != current_lap_number:
        label = timedelta_to_seconds(prev_lap_time)
        if label <= 100.0:
            # print("rpm: ", rpm)
            # print("speed: ", speed)
            # print("gear: ", gear)
            # print("throttle: ", throttle)
            # print("brake: ", brake)
            # print("drs: ", drs)
            # print("------------------------")
            feature = []
            label = timedelta_to_seconds(prev_lap_time)
            
            #feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.Change | EFeatureType.ZeroRatio)                             # 5
            feature += featuring.feature_by_list(throttle, EFeatureType.Basic | EFeatureType.Change)                               # 5
            print(feature)
        rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()
        
                    
    prev_lap_number = current_lap_number
    prev_lap_time = current_lap_time



[72.26791277258567, 100.0, 0.0, 40.05882235466199, 100.0, 0.253125, 5.4, 16.127509160565925]
[72.52777777777777, 100.0, 0.0, 40.34421362172546, 100.0, 0.24148606811145512, 5.708978328173375, 16.800126068261118]
[70.81308411214954, 100.0, 0.0, 40.81386043087603, 100.0, 0.259375, 6.1375, 16.597951869721534]
[64.23410404624278, 100.0, 0.0, 41.88611328013951, 89.5, 0.33043478260869563, 6.2898550724637685, 17.180726635142445]
[69.26315789473684, 100.0, 0.0, 41.860409540413485, 100.0, 0.2546583850931677, 5.881987577639752, 16.25205623491297]
[72.43234323432343, 100.0, 0.0, 39.564699424077034, 100.0, 0.26490066225165565, 5.827814569536423, 17.024220225249486]
[71.7684887459807, 100.0, 0.0, 39.940008690038574, 100.0, 0.26129032258064516, 5.787096774193548, 16.306127701127917]
[71.0741935483871, 100.0, 0.0, 40.16032971329872, 100.0, 0.27184466019417475, 5.805825242718447, 16.013828187822583]
[69.3963963963964, 100.0, 0.0, 41.214092647516324, 100.0, 0.2921686746987952, 6.228915662650603, 16.2791