In [52]:
import pandas as pd
from pathlib import Path
import re

In [89]:
DATA_DIR = Path("../new_data/datasets_circle")  
PATTERN   = "dataset_raw*.csv" 

names = ["t", "ax", "ay", "yawRate", "steer", "gas"]

def natural_key(p):
    parts = re.split(r"(\d+)", p.stem)
    return [int(s) if s.isdigit() else s for s in parts]

dfs = []
for f in sorted(DATA_DIR.glob(PATTERN), key=natural_key):
    df = pd.read_csv(
        f,
        names=names,      
        engine="python"    
    )
    dfs.append(df[1:])


In [90]:
len(dfs), dfs[0]

(60,
         t       ax       ay   yawRate steer     gas
 1    1713  -0.0244  -0.0005   -0.0305  3208  3600.0
 2    1767  -0.0645   0.0006    1.9512  3224  3600.0
 3    1946  -0.0253   0.0321   -0.4268  3540  2922.0
 4    2666  -0.0480  -0.0012    0.0915  2984  2874.0
 5    3386  -0.0499   0.0268   -0.1220  3770  2886.0
 6    4106  -0.0639  -0.0111    0.0915  3078  3028.0
 7    4826  -0.0664   0.0194   -0.3049  3272  3460.0
 8    5545  -0.0474   0.0404   -0.2134  3568  3660.0
 9    6248  -0.0336   0.0644   -0.1220  3706  3982.0
 10   6607  -0.4937  -0.6208   72.9268  3706  3988.0
 11   6788  -0.4745  -3.1374  109.3598  3776  3988.0
 12   6968  -0.9571  -4.0245  150.9451  3846  4002.0
 13   7507  -1.1631  -4.1170  283.2927  4002  4002.0
 14   7688  -4.0291  -5.3282  318.1708  4002  4002.0
 15   7867  -4.5482  -4.3425  299.6037  4002  4002.0
 16   8047  -4.1428  -3.7790  260.8537  4004  4002.0
 17   8227  -3.6279  -4.9022  238.3232  4004  4002.0
 18   8587  -3.3562  -4.9273  273.5671  4

In [91]:
dfs[0].info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 21 entries, 1 to 21
Data columns (total 6 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   t        21 non-null     object 
 1   ax       21 non-null     object 
 2   ay       21 non-null     object 
 3   yawRate  21 non-null     object 
 4   steer    21 non-null     object 
 5   gas      21 non-null     float64
dtypes: float64(1), object(5)
memory usage: 1.1+ KB


In [92]:
!mkdir -p ../new_data/datasets_circle_preproc

In [93]:
for i in range(len(dfs)):
    df_i = dfs[i].astype('float64')
    df_i = df_i.dropna()
    min_t = df_i['t'].min()
    indices_to_drop = []

    for k, row in df_i.iterrows():
        if row['t'] > min_t:
            indices_to_drop.append(k)
        elif row['t'] == min_t:
            break
    df_i = df_i.drop(index=indices_to_drop)
    df_i.to_csv(f'../new_data/datasets_circle_preproc/dataset_raw{i+1}.csv', index=False)
    print(df_i) #check

          t      ax      ay   yawRate   steer     gas
1    1713.0 -0.0244 -0.0005   -0.0305  3208.0  3600.0
2    1767.0 -0.0645  0.0006    1.9512  3224.0  3600.0
3    1946.0 -0.0253  0.0321   -0.4268  3540.0  2922.0
4    2666.0 -0.0480 -0.0012    0.0915  2984.0  2874.0
5    3386.0 -0.0499  0.0268   -0.1220  3770.0  2886.0
6    4106.0 -0.0639 -0.0111    0.0915  3078.0  3028.0
7    4826.0 -0.0664  0.0194   -0.3049  3272.0  3460.0
8    5545.0 -0.0474  0.0404   -0.2134  3568.0  3660.0
9    6248.0 -0.0336  0.0644   -0.1220  3706.0  3982.0
10   6607.0 -0.4937 -0.6208   72.9268  3706.0  3988.0
11   6788.0 -0.4745 -3.1374  109.3598  3776.0  3988.0
12   6968.0 -0.9571 -4.0245  150.9451  3846.0  4002.0
13   7507.0 -1.1631 -4.1170  283.2927  4002.0  4002.0
14   7688.0 -4.0291 -5.3282  318.1708  4002.0  4002.0
15   7867.0 -4.5482 -4.3425  299.6037  4002.0  4002.0
16   8047.0 -4.1428 -3.7790  260.8537  4004.0  4002.0
17   8227.0 -3.6279 -4.9022  238.3232  4004.0  4002.0
18   8587.0 -3.3562 -4.9273 