In [2]:
from Users.project.data_container.data_container import AzureStorageAccess
import torch
from torch import nn
import pandas as pd
import numpy as np
from torch import optim
from Users.project.loss_func import ILossFunc
from Users.project.model import ModelController
from Users.project.optimizer import IOptimizer
from Users.project.predict_lab_time_module.data_state import DataFeaturing, EFeatureType
from Users.project.my_utils import extract_track_from_path
folder_access = AzureStorageAccess()

In [3]:
def timedelta_to_seconds(td):
    if isinstance(td, str):
        td = pd.to_timedelta(td)
    else:
        return None
    return td.total_seconds()

def create_dataset_from_dataframe(data_frame: pd.DataFrame, featuring: DataFeaturing) -> tuple[list, list]:
    if len(data_frame) <= 5:
        return None, None
    rpm = []
    speed = []
    gear = []
    throttle = []
    brake = []
    drs = []

    x_data = []
    y_data = []

    prev_lap_number = 1.0
    prev_lap_time = 0.0

    sum_of_all_speed = 0
    for row in data_frame.itertuples():
        rpm.append(row.RPM)
        speed.append(row.Speed)
        sum_of_all_speed += row.Speed
        gear.append(row.nGear)
        t = row.Throttle
        if t > 100:
            t = 100
        throttle.append(t)
        brake.append(row.Brake)
        drs.append(row.DRS)
        current_lap_number = row.LapNumber
        current_lap_time = row.Time
        
        if prev_lap_number != current_lap_number:
            # 한 랩이 끝났을 때 저장해둔 모든 데이터로 피쳐만들고 라벨(시간)만들기

            feature = []
            label = timedelta_to_seconds(prev_lap_time)
            if label == None:
                return x_data, y_data
            
            # 기어는 basic넣는게 더 나을거같은데
            feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.ZeroRatio)                            
            feature += featuring.feature_by_list(rpm, EFeatureType.Basic | EFeatureType.ZeroRatio)                               
            feature += featuring.feature_by_list(gear, EFeatureType.Basic | EFeatureType.Change | EFeatureType.Boolean)       
            feature += featuring.feature_by_list(throttle, EFeatureType.Change | EFeatureType.Boolean)    
            feature += featuring.feature_by_list(brake, EFeatureType.Boolean)                          
            feature += featuring.feature_by_list(drs, EFeatureType.Change | EFeatureType.ZeroRatio | EFeatureType.Boolean)         
            feature += [featuring.trend_feature(speed).item()]    
            feature += [featuring.trend_feature(rpm).item()]       
            feature += [featuring.trend_feature(throttle).item()] 
            rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()

            x_data.append(feature)
            y_data.append(label)
                        
        prev_lap_number = current_lap_number
        prev_lap_time = current_lap_time
    if sum_of_all_speed == 0:
        return None, None
    feature = []
    label = timedelta_to_seconds(prev_lap_time)
    if label == None:
        return x_data, y_data
    
    # 기어는 basic넣는게 더 나을거같은데
    feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.ZeroRatio)                            
    feature += featuring.feature_by_list(rpm, EFeatureType.Basic | EFeatureType.ZeroRatio)                               
    feature += featuring.feature_by_list(gear, EFeatureType.Basic | EFeatureType.Change | EFeatureType.Boolean)       
    feature += featuring.feature_by_list(throttle, EFeatureType.Change | EFeatureType.Boolean)    
    feature += featuring.feature_by_list(brake, EFeatureType.Boolean)                          
    feature += featuring.feature_by_list(drs, EFeatureType.Change | EFeatureType.ZeroRatio | EFeatureType.Boolean)         
    feature += [featuring.trend_feature(speed).item()]    
    feature += [featuring.trend_feature(rpm).item()]       
    feature += [featuring.trend_feature(throttle).item()] 
    rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()
    x_data.append(feature)
    y_data.append(label)
    return x_data, y_data


print(data_count)

48


In [4]:
def train_regression_model(x_data: list, y_data: list,
                          model: nn.Module, device: torch.device,
                          loss_func, optimizer, epochs: int,
                          batch_size: int = 32, verbose: bool = True):
    model.to(device)
    model.train()

    # 데이터를 텐서로 변환 (한번만)
    x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
    y_tensor = torch.tensor(y_data, dtype=torch.float32).unsqueeze(1).to(device)
    
    dataset_size = len(x_data)
    
    for epoch in range(epochs):
        total_loss = 0.0
        num_batches = 0
        
        # 배치 단위로 학습
        for i in range(0, dataset_size, batch_size):
            end_idx = min(i + batch_size, dataset_size)
            
            x_batch = x_tensor[i:end_idx]
            y_batch = y_tensor[i:end_idx]
            
            # Forward pass
            predictions = model(x_batch)
            loss = loss_func(predictions, y_batch)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1

        if num_batches == 0:
            continue  # 다음 epoch으로

        avg_loss = total_loss / num_batches
        
        if verbose and (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Average Loss: {avg_loss:.4f}")

def predict_model(x_data: list, y_data: list,
                 model: nn.Module, device: torch.device,
                 return_predictions: bool = False) -> list[float]:
    """
    모델 예측 함수 (배치 처리 개선 버전)
    
    Args:
        x_data: 입력 데이터 리스트
        y_data: 실제 타겟 데이터 리스트
        model: 학습된 PyTorch 모델
        device: 연산 장치
        return_predictions: 예측값 리스트 반환 여부
    
    Returns:
        예측값 리스트 (return_predictions=True인 경우)
    """
    model.to(device)
    model.eval()
    
    predictions = []
    
    with torch.no_grad():
        # 전체 데이터를 한번에 텐서로 변환
        x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
        y_tensor = torch.tensor(y_data, dtype=torch.float32).to(device)

        # 배치로 예측 (더 효율적)
        pred_tensor = model(x_tensor).squeeze()
        predictions = pred_tensor.cpu().numpy().tolist()
        
        # 결과 출력
        for i in range(len(x_data)):
            actual = y_tensor[i].item()
            predicted = predictions[i] if isinstance(predictions, list) else predictions
            print(f"샘플 {i+1:>2}: 실제 = {actual:.2f}, 예측 = {predicted:.2f}, "
                  f"오차 = {abs(actual - predicted):.2f}")
    
    if return_predictions:
        return predictions

def evaluate_model(x_data: list, y_data: list,
                  model: nn.Module, device: torch.device) -> tuple[float, float]:
    """
    모델 성능 평가 함수
    
    Returns:
        MSE, MAE 튜플
    """
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
        y_tensor = torch.tensor(y_data, dtype=torch.float32).unsqueeze(1).to(device)
        
        predictions = model(x_tensor)
        
        mse = nn.MSELoss()(predictions, y_tensor).item()
        mae = nn.L1Loss()(predictions, y_tensor).item()
        
    return mse, mae

In [5]:
%load_ext autoreload
%autoreload 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class ImprovedNN(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),

            nn.Linear(128, 64),
            nn.ReLU(),

            nn.Linear(64, 16),
            nn.ReLU(),

            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.net(x)

model = ImprovedNN(data_count).to(device)

loss_func = nn.MSELoss()   
o = optim.Adam(model.parameters(), lr = 0.001)

count = 500
for file in folder_access.get_all_file():
    if ".csv" not in file.name: 
        continue

    track_name = extract_track_from_path(file.name)

    if track_name != "Miami_Grand_Prix":
        continue

    if "car_data_all.csv" not in file.name:
        continue

    data_frame = folder_access.read_csv_by_data_frame(file.name)
    x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
    if x_data == None or y_data == None:
        print(file.name)
        continue
    print(count)
    train_regression_model(x_data, y_data, model, device, loss_func, o, 100)
    count -= 1

    if count == 0:
        break  

500
Epoch [10/100], Average Loss: 53876.6250
Epoch [20/100], Average Loss: 38005.8672
Epoch [30/100], Average Loss: 21377.5215
Epoch [40/100], Average Loss: 8912.2070
Epoch [50/100], Average Loss: 5873.3213
Epoch [60/100], Average Loss: 3923.5181
Epoch [70/100], Average Loss: 2322.1685
Epoch [80/100], Average Loss: 1064.6709
Epoch [90/100], Average Loss: 402.4658
Epoch [100/100], Average Loss: 166.3875
499
Epoch [10/100], Average Loss: 251.1160
Epoch [20/100], Average Loss: 152.6628
Epoch [30/100], Average Loss: 73.7335
Epoch [40/100], Average Loss: 38.8601
Epoch [50/100], Average Loss: 37.2829
Epoch [60/100], Average Loss: 32.8121
Epoch [70/100], Average Loss: 32.6313
Epoch [80/100], Average Loss: 32.0438
Epoch [90/100], Average Loss: 31.8062
Epoch [100/100], Average Loss: 31.4478
498
Epoch [10/100], Average Loss: 347.0385
Epoch [20/100], Average Loss: 221.3673
Epoch [30/100], Average Loss: 56.6618
Epoch [40/100], Average Loss: 59.4401
Epoch [50/100], Average Loss: 34.6244
Epoch [60/1

In [6]:
for file in folder_access.get_all_file():
    if ".csv" not in file.name: 
        continue

    track_name = extract_track_from_path(file.name)

    if track_name != "Miami_Grand_Prix":
        continue

    if "car_data_all.csv" not in file.name:
        continue

    data_frame = folder_access.read_csv_by_data_frame(file.name)
    x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
    if x_data == None or y_data == None:
        continue
    if len(x_data) == 0:
        continue
    print(file.name)
    x_data, y_data = evaluate_model(x_data, y_data, model, device)
    print(x_data, y_data)

    if count == 0:
        break  

2022/2022_Miami_Grand_Prix_FP1/ALB/car_data_all.csv
53.09661865234375 5.294585704803467
2022/2022_Miami_Grand_Prix_FP1/ALO/car_data_all.csv
45.51087188720703 3.3658883571624756
2022/2022_Miami_Grand_Prix_FP1/BOT/car_data_all.csv
796.8827514648438 10.310826301574707
2022/2022_Miami_Grand_Prix_FP1/GAS/car_data_all.csv
13.644977569580078 3.2241358757019043
2022/2022_Miami_Grand_Prix_FP1/HAM/car_data_all.csv
65.38178253173828 5.4413743019104
2022/2022_Miami_Grand_Prix_FP1/LAT/car_data_all.csv
29.335556030273438 3.4449596405029297
2022/2022_Miami_Grand_Prix_FP1/LEC/car_data_all.csv
81.35282897949219 5.256961345672607
2022/2022_Miami_Grand_Prix_FP1/MAG/car_data_all.csv
100.8864974975586 5.244745254516602
2022/2022_Miami_Grand_Prix_FP1/MSC/car_data_all.csv
48.45464324951172 4.001832962036133
2022/2022_Miami_Grand_Prix_FP1/NOR/car_data_all.csv
50.71266555786133 3.5177547931671143
2022/2022_Miami_Grand_Prix_FP1/OCO/car_data_all.csv
41.757911682128906 3.914416551589966
2022/2022_Miami_Grand_Prix

In [7]:
# blob_name = "2018/2018_Australian_Grand_Prix_R/ALO/car_data_all.csv"
# blob_name = "2019/2019_Australian_Grand_Prix_Q/PER/car_data_all.csv"
# blob_name = "2018/2018_Australian_Grand_Prix_FP3/PER/car_data_all.csv"
# blob_name = "2023/2023_Australian_Grand_Prix_R/ZHO/car_data_all.csv"
# #blob_name = "2023/2023_Australian_Grand_Prix_R/LEC/car_data_all.csv"
# data_frame = folder_access.read_csv_by_data_frame(blob_name)
# #data_frame.to_csv("./car_data_all-warst.csv", index=False, encoding="utf-8")
# x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
# predict_model(x_data, y_data, model, device)
# x_data, y_data = evaluate_model(x_data, y_data, model, device)
# print(x_data, y_data)



In [8]:
# blob_name = "2023/2023_Miami_Grand_Prix_R/ZHO/car_data_all.csv"

# data_frame = folder_access.read_csv_by_data_frame(blob_name)

# rpm = []
# speed = []
# gear = []
# throttle = []
# brake = []
# drs = []

# prev_lap_number = 1.0
# prev_lap_time = 0.0

# for row in data_frame.itertuples():
#     rpm.append(row.RPM)
#     speed.append(row.Speed)
#     gear.append(row.nGear)
#     throttle.append(row.Throttle)
#     brake.append(row.Brake)
#     drs.append(row.DRS)
#     current_lap_number = row.LapNumber
#     current_lap_time = row.Time
    
#     if prev_lap_number != current_lap_number:
#         label = timedelta_to_seconds(prev_lap_time)
#         if label >= 800.0:
#             # print("rpm: ", rpm)
#             # print("speed: ", speed)
#             # print("gear: ", gear)
#             # print("throttle: ", throttle)
#             # print("brake: ", brake)
#             # print("drs: ", drs)
#             # print("------------------------")
#             feature = []
#             label = timedelta_to_seconds(prev_lap_time)
            
#             #feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.Change | EFeatureType.ZeroRatio)                             # 5
#             feature += featuring.feature_by_list(brake, EFeatureType.Boolean)                               # 5

#         rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()
        
                    
#     prev_lap_number = current_lap_number
#     prev_lap_time = current_lap_time

