In [71]:
from Users.project.data_container.data_container import AzureStorageAccess
import torch
import numpy as np
import pandas as pd
from torch import optim
from Users.project.loss_func import ILossFunc
from Users.project.model import ModelController
from Users.project.optimizer import IOptimizer
from Users.project.predict_lab_time_module.data_state import DataFeaturing, EFeatureType
from Users.project.my_utils import extract_track_from_path
from torch import nn
folder_access = AzureStorageAccess()

In [72]:
def timedelta_to_seconds(td):
    if isinstance(td, str):
        td = pd.to_timedelta(td)
    else:
        return None
    return td.total_seconds()

def create_dataset_from_dataframe(data_frame: pd.DataFrame, featuring: DataFeaturing) -> tuple[list, list]:
    if len(data_frame) <= 5:
        return None, None
    rpm = []
    speed = []
    gear = []
    throttle = []
    brake = []
    drs = []

    x_data = []
    y_data = []

    prev_lap_number = 1.0
    prev_lap_time = 0.0

    sum_of_all_speed = 0
    for row in data_frame.itertuples():
        rpm.append(row.RPM)
        speed.append(row.Speed)
        sum_of_all_speed += row.Speed
        gear.append(row.nGear)
        t = row.Throttle
        if t > 100:
            t = 100
        throttle.append(t)
        brake.append(row.Brake)
        drs.append(row.DRS)
        current_lap_number = row.LapNumber
        current_lap_time = row.Time
        
        if prev_lap_number != current_lap_number:
            # 한 랩이 끝났을 때 저장해둔 모든 데이터로 피쳐만들고 라벨(시간)만들기

            feature = []
            label = timedelta_to_seconds(prev_lap_time)
            if label == None:
                return x_data, y_data
            
            # 기어는 basic넣는게 더 나을거같은데
            feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.ZeroRatio)                            
            feature += featuring.feature_by_list(rpm, EFeatureType.Basic | EFeatureType.ZeroRatio)                               
            feature += featuring.feature_by_list(gear, EFeatureType.Basic | EFeatureType.Change | EFeatureType.Boolean)       
            feature += featuring.feature_by_list(throttle, EFeatureType.Change | EFeatureType.Boolean)    
            feature += featuring.feature_by_list(brake, EFeatureType.Boolean)                          
            feature += featuring.feature_by_list(drs, EFeatureType.Change | EFeatureType.ZeroRatio | EFeatureType.Boolean)         
            feature += [featuring.trend_feature(speed).item()]    
            feature += [featuring.trend_feature(rpm).item()]       
            feature += [featuring.trend_feature(throttle).item()] 
            rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()

            x_data.append(feature)
            y_data.append(label)
                        
        prev_lap_number = current_lap_number
        prev_lap_time = current_lap_time
    if sum_of_all_speed == 0:
        return None, None
    feature = []
    label = timedelta_to_seconds(prev_lap_time)
    if label == None:
        return x_data, y_data
    
    # 기어는 basic넣는게 더 나을거같은데
    feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.ZeroRatio)                            
    feature += featuring.feature_by_list(rpm, EFeatureType.Basic | EFeatureType.ZeroRatio)                               
    feature += featuring.feature_by_list(gear, EFeatureType.Basic | EFeatureType.Change | EFeatureType.Boolean)       
    feature += featuring.feature_by_list(throttle, EFeatureType.Change | EFeatureType.Boolean)    
    feature += featuring.feature_by_list(brake, EFeatureType.Boolean)                          
    feature += featuring.feature_by_list(drs, EFeatureType.Change | EFeatureType.ZeroRatio | EFeatureType.Boolean)         
    feature += [featuring.trend_feature(speed).item()]    
    feature += [featuring.trend_feature(rpm).item()]       
    feature += [featuring.trend_feature(throttle).item()] 
    rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()
    x_data.append(feature)
    y_data.append(label)
    return x_data, y_data

featuring = DataFeaturing()
column_names = ["Date",	"RPM",	"Speed",	"nGear",	"Throttle",	"Brake",	"DRS",	"Source",	"Time",	"SessionTime",	"Distance",	"LapNumber"]

test_data = [
    ["03:18.8",	11009,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	1],
    ["03:18.8",	11009,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	1],
    ["03:18.8",	11009,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	1],
    ["03:18.8",	11009,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	2],
    ["03:18.8",	11009,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	2],
    ["03:18.8",	11009,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	2],
    ["03:18.8",	11008,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	3],
    ["03:18.8",	11005,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	3],
    ["03:18.8",	11005,	2,	1,	29,	True,	1,	"car",	"0 days 00:00:00.145000",	"0 days 01:02:17.008000",	0,	3],
]
df = pd.DataFrame(test_data, columns=column_names)
test_x, test_y = create_dataset_from_dataframe(df, featuring)
data_count = len(test_x[0])
print(data_count)

48


In [73]:
def train_regression_model(x_data: list, y_data: list,
                          model: nn.Module, device: torch.device,
                          loss_func, optimizer, epochs: int,
                          batch_size: int = 32, verbose: bool = True):
    model.to(device)
    model.train()

    # 데이터를 텐서로 변환 (한번만)
    x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
    y_tensor = torch.tensor(y_data, dtype=torch.float32).unsqueeze(1).to(device)
    
    dataset_size = len(x_data)
    
    for epoch in range(epochs):
        total_loss = 0.0
        num_batches = 0
        
        # 배치 단위로 학습
        for i in range(0, dataset_size, batch_size):
            end_idx = min(i + batch_size, dataset_size)
            
            x_batch = x_tensor[i:end_idx]
            y_batch = y_tensor[i:end_idx]
            
            # Forward pass
            predictions = model(x_batch)
            loss = loss_func(predictions, y_batch)
            
            # Backward pass
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1

        if num_batches == 0:
            continue  # 다음 epoch으로

        avg_loss = total_loss / num_batches
        
        if verbose and (epoch + 1) % 10 == 0:
            print(f"Epoch [{epoch+1}/{epochs}], Average Loss: {avg_loss:.4f}")

def predict_model(x_data: list, y_data: list,
                 model: nn.Module, device: torch.device,
                 return_predictions: bool = False) -> list[float]:
    """
    모델 예측 함수 (배치 처리 개선 버전)
    
    Args:
        x_data: 입력 데이터 리스트
        y_data: 실제 타겟 데이터 리스트
        model: 학습된 PyTorch 모델
        device: 연산 장치
        return_predictions: 예측값 리스트 반환 여부
    
    Returns:
        예측값 리스트 (return_predictions=True인 경우)
    """
    model.to(device)
    model.eval()
    
    predictions = []
    
    with torch.no_grad():
        # 전체 데이터를 한번에 텐서로 변환
        x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
        y_tensor = torch.tensor(y_data, dtype=torch.float32).to(device)

        # 배치로 예측 (더 효율적)
        pred_tensor = model(x_tensor).squeeze()
        predictions = pred_tensor.cpu().numpy().tolist()
        
        # 결과 출력
        for i in range(len(x_data)):
            actual = y_tensor[i].item()
            predicted = predictions[i] if isinstance(predictions, list) else predictions
            print(f"샘플 {i+1:>2}: 실제 = {actual:.2f}, 예측 = {predicted:.2f}, "
                  f"오차 = {abs(actual - predicted):.2f}")
    
    if return_predictions:
        return predictions

def evaluate_model(x_data: list, y_data: list,
                  model: nn.Module, device: torch.device) -> tuple[float, float]:
    """
    모델 성능 평가 함수
    
    Returns:
        MSE, MAE 튜플
    """
    model.to(device)
    model.eval()
    
    with torch.no_grad():
        x_tensor = torch.tensor(x_data, dtype=torch.float32).to(device)
        y_tensor = torch.tensor(y_data, dtype=torch.float32).unsqueeze(1).to(device)
        
        predictions = model(x_tensor)
        
        mse = nn.MSELoss()(predictions, y_tensor).item()
        mae = nn.L1Loss()(predictions, y_tensor).item()
        
    return mse, mae

In [74]:
%load_ext autoreload
%autoreload 2
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

class ImprovedNN(nn.Module):
    def __init__(self, in_dim):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(in_dim, 128),
            nn.ReLU(),

            nn.Linear(128, 64),
            nn.ReLU(),

            nn.Linear(64, 16),
            nn.ReLU(),

            nn.Linear(16, 1)
        )

    def forward(self, x):
        return self.net(x)

model = ImprovedNN(data_count).to(device)

loss_func = nn.MSELoss()   
o = optim.Adam(model.parameters(), lr = 0.001)

count = 500
for file in folder_access.get_all_file():
    if ".csv" not in file.name: 
        continue

    track_name = extract_track_from_path(file.name)

    if track_name != "Australian_Grand_Prix":
        continue

    if "car_data_all.csv" not in file.name:
        continue

    data_frame = folder_access.read_csv_by_data_frame(file.name)
    x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
    if x_data == None or y_data == None:
        print(file.name)
        continue
    print(count)
    train_regression_model(x_data, y_data, model, device, loss_func, o, 100)
    count -= 1

    if count == 0:
        break  

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


500
Epoch [10/100], Average Loss: 389913.0938
Epoch [20/100], Average Loss: 170682.5000
Epoch [30/100], Average Loss: 31475.2402
Epoch [40/100], Average Loss: 6628.1685
Epoch [50/100], Average Loss: 4347.4448
Epoch [60/100], Average Loss: 3268.2974
Epoch [70/100], Average Loss: 1949.5591
Epoch [80/100], Average Loss: 1426.5343
Epoch [90/100], Average Loss: 1042.9690
Epoch [100/100], Average Loss: 734.4777
499
Epoch [10/100], Average Loss: 1480.2008
Epoch [20/100], Average Loss: 366.0725
Epoch [30/100], Average Loss: 387.8093
Epoch [40/100], Average Loss: 276.1802
Epoch [50/100], Average Loss: 169.5810
Epoch [60/100], Average Loss: 110.5021
Epoch [70/100], Average Loss: 80.6629
Epoch [80/100], Average Loss: 65.2195
Epoch [90/100], Average Loss: 56.0882
Epoch [100/100], Average Loss: 49.7706
498
Epoch [10/100], Average Loss: 69.8651
Epoch [20/100], Average Loss: 47.7888
Epoch [30/100], Average Loss: 35.5498
Epoch [40/100], Average Loss: 28.0974
Epoch [50/100], Average Loss: 24.3108
Epoch

In [75]:
# for file in folder_access.get_all_file():
#     if ".csv" not in file.name: 
#         continue

#     track_name = extract_track_from_path(file.name)

#     if track_name != "Australian_Grand_Prix":
#         continue

#     if "car_data_all.csv" not in file.name:
#         continue

#     data_frame = folder_access.read_csv_by_data_frame(file.name)
#     x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
#     if x_data == None or y_data == None:
#         continue
#     if len(x_data) == 0:
#         continue
#     print(file.name)
#     x_data, y_data = evaluate_model(x_data, y_data, model, device)
#     print(x_data, y_data)

#     if count == 0:
#         break  

In [76]:
blob_name = "2018/2018_Australian_Grand_Prix_R/ALO/car_data_all.csv"
blob_name = "2019/2019_Australian_Grand_Prix_Q/PER/car_data_all.csv"
blob_name = "2018/2018_Australian_Grand_Prix_FP3/PER/car_data_all.csv"
blob_name = "2023/2023_Australian_Grand_Prix_R/ZHO/car_data_all.csv"
#blob_name = "2023/2023_Australian_Grand_Prix_R/LEC/car_data_all.csv"
data_frame = folder_access.read_csv_by_data_frame(blob_name)
#data_frame.to_csv("./car_data_all-warst.csv", index=False, encoding="utf-8")
x_data, y_data = create_dataset_from_dataframe(data_frame, featuring)
predict_model(x_data, y_data, model, device)
x_data, y_data = evaluate_model(x_data, y_data, model, device)
print(x_data, y_data)



샘플  1: 실제 = 129.83, 예측 = 161.14, 오차 = 31.32
샘플  2: 실제 = 119.66, 예측 = 117.63, 오차 = 2.03
샘플  3: 실제 = 123.60, 예측 = 129.00, 오차 = 5.40
샘플  4: 실제 = 84.77, 예측 = 86.02, 오차 = 1.25
샘플  5: 실제 = 84.73, 예측 = 89.48, 오차 = 4.75
샘플  6: 실제 = 84.56, 예측 = 85.04, 오차 = 0.48
샘플  7: 실제 = 101.18, 예측 = 96.73, 오차 = 4.45
샘플  8: 실제 = 1106.04, 예측 = 931.38, 오차 = 174.66
샘플  9: 실제 = 232.65, 예측 = 230.98, 오차 = 1.67
샘플 10: 실제 = 91.34, 예측 = 93.85, 오차 = 2.51
샘플 11: 실제 = 85.08, 예측 = 88.93, 오차 = 3.85
샘플 12: 실제 = 83.57, 예측 = 83.10, 오차 = 0.47
샘플 13: 실제 = 83.30, 예측 = 84.77, 오차 = 1.47
샘플 14: 실제 = 83.25, 예측 = 86.50, 오차 = 3.24
샘플 15: 실제 = 84.87, 예측 = 86.94, 오차 = 2.06
샘플 16: 실제 = 85.36, 예측 = 86.32, 오차 = 0.96
샘플 17: 실제 = 83.06, 예측 = 83.62, 오차 = 0.56
샘플 18: 실제 = 107.49, 예측 = 105.00, 오차 = 2.49
샘플 19: 실제 = 97.60, 예측 = 99.92, 오차 = 2.32
샘플 20: 실제 = 83.60, 예측 = 85.13, 오차 = 1.54
샘플 21: 실제 = 83.05, 예측 = 83.43, 오차 = 0.38
샘플 22: 실제 = 83.06, 예측 = 85.54, 오차 = 2.47
샘플 23: 실제 = 83.38, 예측 = 84.60, 오차 = 1.22
샘플 24: 실제 = 82.96, 예측 = 84.77, 오차 = 1.81

In [77]:
blob_name = "2023/2023_Australian_Grand_Prix_R/ZHO/car_data_all.csv"

data_frame = folder_access.read_csv_by_data_frame(blob_name)

rpm = []
speed = []
gear = []
throttle = []
brake = []
drs = []

prev_lap_number = 1.0
prev_lap_time = 0.0

for row in data_frame.itertuples():
    rpm.append(row.RPM)
    speed.append(row.Speed)
    gear.append(row.nGear)
    throttle.append(row.Throttle)
    brake.append(row.Brake)
    drs.append(row.DRS)
    current_lap_number = row.LapNumber
    current_lap_time = row.Time
    
    if prev_lap_number != current_lap_number:
        label = timedelta_to_seconds(prev_lap_time)
        if label >= 800.0:
            # print("rpm: ", rpm)
            # print("speed: ", speed)
            # print("gear: ", gear)
            # print("throttle: ", throttle)
            # print("brake: ", brake)
            # print("drs: ", drs)
            # print("------------------------")
            feature = []
            label = timedelta_to_seconds(prev_lap_time)
            
            #feature += featuring.feature_by_list(speed, EFeatureType.Basic | EFeatureType.Change | EFeatureType.ZeroRatio)                             # 5
            feature += featuring.feature_by_list(brake, EFeatureType.Boolean)                               # 5

        rpm.clear(); speed.clear(); gear.clear(); throttle.clear(); brake.clear(); drs.clear()
        
                    
    prev_lap_number = current_lap_number
    prev_lap_time = current_lap_time

