In [18]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
from scipy.stats import zscore

csv_path = "./data/Landslide2.csv"
device = "101"
save_path = f"./data/dev{device}_resample1T.csv"
#save_path = f"data/devAll2_prepared.csv"

df = pd.read_csv(csv_path)

for col in ['soil', 'rain', 'temp', 'humi', 'geo', 'lat', 'lng']:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df

#df = df[~df['timestamp'].str.contains(r'\(ICT\)', na=False)]
#df['timestamp'] = df['timestamp'].str.replace(r'\(ICT\)', '', regex=True).str.strip()

def clean_and_format_timestamp(val):
    if pd.isnull(val):
        return None
    val = str(val).replace('(ICT)', '').strip()
    try:
        dt = pd.to_datetime(val)
        return dt.isoformat()
    except Exception:
        return None  

df['timestamp'] = df['timestamp'].apply(clean_and_format_timestamp)

df.columns = df.columns.str.strip()
df['timestamp'] = pd.to_datetime(df['timestamp'], errors='coerce')
df = df.dropna(subset=['timestamp', 'geo'])  # ต้องมี timestamp และ geo


df = df.drop(columns=['unixt', 'gateway time', 'old time', 'risk_level', 'confidence'], errors='ignore')
df = df[df['devID'] != 218499049.0]
df = df[(df['lat'] > 1.0) & (df['lat'] < 8000.0)]
df = df[df['lng'] > 15.0]
df = df[df['soil'] != 0.0]
df = df[df['rain'] != -1.0]

df = df[df['devID'] == int(device)]

  df = pd.read_csv(csv_path)


In [19]:
start_date = "2025-05-02"
end_date = "2025-05-04"

df = df[(df['timestamp'] >= start_date) & (df['timestamp'] <= end_date)]
print(df['timestamp'].min())
print(df['timestamp'].max())

2025-05-02 11:42:06.635276
2025-05-03 23:59:53.332911


In [20]:
df.describe()

Unnamed: 0,timestamp,devID,soil,rain,temp,humi,geo,lat,lng
count,10398,10398.0,10398.0,10398.0,10398.0,10398.0,10398.0,10398.0,10398.0
mean,2025-05-03 04:42:16.320503552,101.0,41.105963,1.132889,30.087685,76.296469,10.921122,8.637987,99.899006
min,2025-05-02 11:42:06.635276,101.0,27.299999,0.0,25.048067,59.367744,0.515554,8.637954,99.898979
25%,2025-05-02 19:45:04.132482304,101.0,27.6,0.0,27.574196,70.535915,7.35409,8.637979,99.898994
50%,2025-05-03 04:17:49.963717632,101.0,46.5,0.0,29.406044,79.375183,9.867025,8.637985,99.89901
75%,2025-05-03 13:21:17.633472256,101.0,50.5,0.0,33.016327,80.785213,14.596468,8.637994,99.899017
max,2025-05-03 23:59:53.332911,101.0,55.799999,41.6306,35.665295,87.270775,32.059425,8.638022,99.899033
std,,0.0,11.11297,4.406632,2.839717,6.679876,5.374799,1.3e-05,1.2e-05


In [21]:
# เรียงลำดับตาม devID และ timestamp
df = df.sort_values('timestamp').reset_index(drop=True)
#df = df.sort_values(['devID', 'timestamp']).reset_index(drop=True)

# ตั้ง timestamp เป็น index
df = df.set_index('timestamp')
df

Unnamed: 0_level_0,devID,soil,rain,temp,humi,geo,lat,lng
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2025-05-02 11:42:06.635276,101.0,28.799999,0.0,34.290073,62.429081,14.532875,8.637999,99.899010
2025-05-02 11:42:18.056572,101.0,28.799999,0.0,34.266041,64.082787,14.673310,8.637998,99.899010
2025-05-02 11:42:28.667259,101.0,28.900000,0.0,34.250019,62.839169,15.995875,8.637998,99.899010
2025-05-02 11:42:40.117564,101.0,28.799999,0.0,34.185932,62.661785,19.812029,8.637998,99.899010
2025-05-02 11:42:50.732852,101.0,28.900000,0.0,34.175251,63.481953,8.099698,8.637998,99.899010
...,...,...,...,...,...,...,...,...
2025-05-03 23:59:06.643556,101.0,51.700001,0.0,26.847866,84.886551,16.560194,8.637994,99.899010
2025-05-03 23:59:17.595168,101.0,51.700001,0.0,27.438011,81.466621,14.813961,8.637994,99.899010
2025-05-03 23:59:29.408355,101.0,51.700001,0.0,27.798504,79.826279,17.531973,8.637995,99.899017
2025-05-03 23:59:41.144875,101.0,51.700001,0.0,26.797131,85.088737,17.377155,8.637996,99.899017


In [22]:
window = '1T'  # 10 นาที
features = ['soil', 'rain', 'temp', 'humi', 'geo', 'lat', 'lng']


df_rolling = df[features].resample(window).agg(['mean', 'std', 'min', 'max'])
df_rolling.columns = ['_'.join(col) for col in df_rolling.columns]
df_rolling['devID'] = int(device)
df_rolling = df_rolling.dropna()

df_rolling = df_rolling.reset_index()
df_rolling = df_rolling[['timestamp', 'devID', 'soil_mean', 'rain_mean', 'temp_mean', 'humi_mean', 'geo_mean']]
df_rolling

  df_rolling = df[features].resample(window).agg(['mean', 'std', 'min', 'max'])


Unnamed: 0,timestamp,devID,soil_mean,rain_mean,temp_mean,humi_mean,geo_mean
0,2025-05-02 11:42:00,101,28.839999,0.0,34.233463,63.098955,14.622758
1,2025-05-02 11:43:00,101,28.816666,0.0,34.047519,63.319192,10.538904
2,2025-05-02 11:44:00,101,28.799999,0.0,34.081788,63.662011,9.419168
3,2025-05-02 11:45:00,101,28.799999,0.0,34.297550,63.891277,9.548765
4,2025-05-02 11:46:00,101,28.799999,0.0,34.542156,63.798203,10.704843
...,...,...,...,...,...,...,...
2148,2025-05-03 23:55:00,101,51.733334,0.0,27.307165,81.915175,17.956299
2149,2025-05-03 23:56:00,101,51.740001,0.0,27.567254,80.837186,15.542307
2150,2025-05-03 23:57:00,101,51.750001,0.0,27.194343,82.946749,19.222403
2151,2025-05-03 23:58:00,101,51.700001,0.0,27.628271,81.119003,15.267865


In [23]:
df_rolling.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2153 entries, 0 to 2152
Data columns (total 7 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   timestamp  2153 non-null   datetime64[ns]
 1   devID      2153 non-null   int64         
 2   soil_mean  2153 non-null   float64       
 3   rain_mean  2153 non-null   float64       
 4   temp_mean  2153 non-null   float64       
 5   humi_mean  2153 non-null   float64       
 6   geo_mean   2153 non-null   float64       
dtypes: datetime64[ns](1), float64(5), int64(1)
memory usage: 117.9 KB


In [24]:
nan_counts = df_rolling.isna().sum()

nan_columns = nan_counts[nan_counts > 0]

print("📌 คอลัมน์ที่มีค่า NaN:")
print(nan_columns)

📌 คอลัมน์ที่มีค่า NaN:
Series([], dtype: int64)


In [25]:

df_rolling.to_csv(f"{save_path}", index=False)

print("✅ บันทึก เรียบร้อยแล้ว", save_path)

✅ บันทึก เรียบร้อยแล้ว ./data/dev101_resample1T.csv
