In [None]:
!pip install pandas

In [None]:
import pandas as pd
import os

def process_csv(file_path, output_folder):
    try:

        df = pd.read_csv(file_path)
        # df = [[float(x) for x in line.split()] for line in df]

        print(f"Processing file: {file_path}")

        # Check whether the DataFrame is empty
        if df.empty:
            print("DataFrame is empty")
            return

        # Get the last column
        last_column = df.iloc[:, -3]

        # Locate the row in the last column from top to bottom where the first value greater than 600 is located
        row_index_from_top = last_column[last_column > 600].index.min()
        # Find the row in the last column from bottom to top where the first value greater than 600 is located
        row_index_from_bottom = last_column[last_column > 600].index.max()

        # Keep the line between these two points
        if pd.notna(row_index_from_top) and pd.notna(row_index_from_bottom):
            df = df.loc[row_index_from_top:row_index_from_bottom]
        df.iloc[:, -3] = (df.iloc[:, -3] < 200).astype(int)
        # Save the processed data
        new_file_path = os.path.join(output_folder, os.path.basename(file_path))
        df.to_csv(new_file_path, index=False)

        print(f"File has been saved to: {new_file_path}")

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")

input_folders = [
    'grass_fusion',
    'gras2mat_fusion',
    'mat_fusion',
    'water_fusion'
]

# Iterate through all folders
for input_folder in input_folders:
    output_folder = os.path.join(input_folder, 'allcolunm')
    
    # Create an output folder
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through all CSV files in the folder
    input_folder = os.path.join(input_folder, 'raw_data')
    for file in os.listdir(input_folder):
        if file.endswith('.csv'):
            process_csv(os.path.join(input_folder, file), output_folder)


In [2]:
import pandas as pd
import os

def process_csv(file_path, output_folder):
    try:
        # Read the CSV file
        df = pd.read_csv(file_path)

        print(f"Processing file: {file_path}")

        # Check if DataFrame is empty
        if df.empty:
            print("DataFrame is empty")
            return

        # Convert the last column: values < 200 to 1, others to 0
        # df.iloc[:, -1] = (df.iloc[:, -1] < 200).astype(int)

        # Find the index of the first and last occurrence where the last column is 1
        first_one = df[df.iloc[:, -1] == 1].index.min()
        last_one = df[df.iloc[:, -1] == 1].index.max()

        # Check if there are any 1's in the last column
        if pd.notna(first_one) and pd.notna(last_one):
            # Define the range of rows to keep
            start_row = max(0, first_one - 150)
            end_row = min(df.shape[0], last_one + 150)

            # Keep rows in the specified range
            df = df.loc[start_row:end_row, :]

        # Save the processed data to the new output folder
        new_file_path = os.path.join(output_folder, os.path.basename(file_path))
        df.to_csv(new_file_path, index=False)

        print(f"File has been saved to: {new_file_path}")

    except Exception as e:
        print(f"Error processing file {file_path}: {e}")


input_folders = [
    'grass_fusion',
    'grass2mat_fusion',
    'mat_fusion',
    'water_fusion'
]

# 遍历所有文件夹
for input_folder in input_folders:
    output_folder = os.path.join(input_folder, '150data')
    
    # Create an output folder
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Iterate through all CSV files in the folder
    input_folder = os.path.join(input_folder, 'raw_data')
    for file in os.listdir(input_folder):
        if file.endswith('.csv'):
            process_csv(os.path.join(input_folder, file), output_folder)


Processing file: grass_fusion\raw_data\imu_data_0313-0014.csv
File has been saved to: grass_fusion\allcolunm\imu_data_0313-0014.csv
Processing file: grass_fusion\raw_data\imu_data_0313-0016.csv
File has been saved to: grass_fusion\allcolunm\imu_data_0313-0016.csv
Processing file: grass_fusion\raw_data\imu_data_0313-0017.csv
File has been saved to: grass_fusion\allcolunm\imu_data_0313-0017.csv
Processing file: grass_fusion\raw_data\imu_data_0313-0059.csv
File has been saved to: grass_fusion\allcolunm\imu_data_0313-0059.csv
Processing file: grass_fusion\raw_data\imu_data_0313-0100.csv
File has been saved to: grass_fusion\allcolunm\imu_data_0313-0100.csv
Processing file: grass_fusion\raw_data\imu_data_0313-0102.csv
File has been saved to: grass_fusion\allcolunm\imu_data_0313-0102.csv
Processing file: grass_fusion\raw_data\imu_data_0313-0103.csv
File has been saved to: grass_fusion\allcolunm\imu_data_0313-0103.csv
Processing file: grass_fusion\raw_data\imu_data_0313-0104.csv
File has been 

In [3]:
import pandas as pd
import numpy as np
import os

def create_dataset(folder_path, input_size=150, label_size=1, step_size=1):
    all_features = []
    all_labels = []
    
    for file in os.listdir(folder_path):
        if file.endswith('.csv'):
            file_path = os.path.join(folder_path, file)
            df = pd.read_csv(file_path)
            features, labels = process_file(df, input_size, label_size, step_size, file)
            all_features.extend(features)
            all_labels.extend(labels)

    return np.array(all_features), np.array(all_labels)

def process_file(df, input_size, label_size, step_size, file_name):
    if df.empty or len(df.columns) < 2:
        return [], []

    features_list = []
    labels_list = []

    for start in range(0, len(df) - input_size - label_size + 1, step_size):
        end = start + input_size
        label_end = end + label_size

        # Calculate the mean and standard deviation within the current window
        window_mean = df.iloc[start:end, :].mean().values
        window_std = df.iloc[start:end, :].std().values
        if window_std[0] == 0 or window_std[1] == 0 or window_std[2] == 0:
            print(f"File {file_name}: Skipping window with zero standard deviation.")
            continue

        features1 = (df.iloc[start:end, 0].values - window_mean[0]) / window_std[0]  
        features2 = (df.iloc[start:end, 1].values - window_mean[1]) / window_std[1]  
        features3 = (df.iloc[start:end, 2].values - window_mean[2]) / window_std[2]  
        labels = df.iloc[end:label_end, -1].values

        features = np.column_stack((features1, features2, features3))

        features_list.append(features)
        labels_list.append(labels)

    return features_list, labels_list

input_folders = [
    # 'grass_fusion',
    # 'grass2mat_fusion',
    'mat_fusion',
    # 'water_fusion'
]

# 遍历所有文件夹
for input_folder in input_folders:
    input_folder0 = os.path.join(input_folder, '150data')
    features, labels = create_dataset(input_folder0)
    features_path = os.path.join(input_folder, 'features3_150_150normalization.npy')
    labels_path = os.path.join(input_folder, 'labels3_150_150normalization.npy')

    np.save(features_path, features)
    np.save(labels_path, labels)
