<a href="https://colab.research.google.com/github/BanerjeeProdipta/API_CRUD/blob/master/Distance_Estimation_of_Robot_Using_IMU_Data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
from google.colab import drive
drive.mount('/content/drive')

%cd /content/drive/MyDrive/Colab Notebooks
!ls

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/Colab Notebooks
 amazon.csv		  E-Commerce.ipynb			  online_shoppers_intention.csv
'amazon scrapper.ipynb'   glass.data				 'Recommender System.ipynb'
 bestbuy.csv		 'IMU dataset-20241113T171518Z-001.zip'   scaperToRecomanndation.ipynb


In [13]:
!unzip "IMU dataset-20241113T171518Z-001.zip"

Archive:  IMU dataset-20241113T171518Z-001.zip
  inflating: IMU dataset/Curve_fast-2024-10-16_21-43-07.zip  
  inflating: IMU dataset/Curve_slow-2024-10-16_21-38-29.zip  
  inflating: IMU dataset/Straight_slow-2024-10-16_21-29-36.zip  
  inflating: IMU dataset/Straight_fast-2024-10-16_21-34-59.zip  


In [14]:
%cd "IMU dataset"
!ls

/content/drive/MyDrive/Colab Notebooks/IMU dataset
Curve_fast-2024-10-16_21-43-07.zip  Straight_fast-2024-10-16_21-34-59.zip
Curve_slow-2024-10-16_21-38-29.zip  Straight_slow-2024-10-16_21-29-36.zip


In [15]:
import os
import zipfile
import shutil

# Define a list of zip files and their corresponding prefixes
zips_and_prefixes = [
    ("Curve_fast-2024-10-16_21-43-07.zip", "Curve_fast"),
    ("Curve_slow-2024-10-16_21-38-29.zip", "Curve_slow"),
    ("Straight_slow-2024-10-16_21-29-36.zip", "Straight_slow"),
    ("Straight_fast-2024-10-16_21-34-59.zip", "Straight_fast")
]

# Unzip files and rename CSVs with prefix
for zip_file, prefix in zips_and_prefixes:
    print(f"\nProcessing {zip_file}...")

    # Create a temporary directory for extraction
    extraction_dir = f"./extracted_{prefix}"
    os.makedirs(extraction_dir, exist_ok=True)

    # Unzip the file into the extraction directory
    with zipfile.ZipFile(zip_file, 'r') as zip_ref:
        zip_ref.extractall(extraction_dir)

    # Rename extracted CSV files with the given prefix
    for filename in os.listdir(extraction_dir):
        if filename.endswith('.csv'):
            new_filename = f"{prefix}_{filename}"
            os.rename(os.path.join(extraction_dir, filename), new_filename)
            print(f"Renamed: {filename} -> {new_filename}")

    # Clean up the extraction directory
    shutil.rmtree(extraction_dir)

# List all renamed files to verify
print("\nFinal list of renamed files:")
!ls *.csv



Processing Curve_fast-2024-10-16_21-43-07.zip...
Renamed: Metadata.csv -> Curve_fast_Metadata.csv
Renamed: Accelerometer.csv -> Curve_fast_Accelerometer.csv
Renamed: Gravity.csv -> Curve_fast_Gravity.csv
Renamed: Gyroscope.csv -> Curve_fast_Gyroscope.csv
Renamed: Orientation.csv -> Curve_fast_Orientation.csv
Renamed: Annotation.csv -> Curve_fast_Annotation.csv
Renamed: TotalAcceleration.csv -> Curve_fast_TotalAcceleration.csv

Processing Curve_slow-2024-10-16_21-38-29.zip...
Renamed: Metadata.csv -> Curve_slow_Metadata.csv
Renamed: Accelerometer.csv -> Curve_slow_Accelerometer.csv
Renamed: Gravity.csv -> Curve_slow_Gravity.csv
Renamed: Gyroscope.csv -> Curve_slow_Gyroscope.csv
Renamed: Orientation.csv -> Curve_slow_Orientation.csv
Renamed: Annotation.csv -> Curve_slow_Annotation.csv
Renamed: TotalAcceleration.csv -> Curve_slow_TotalAcceleration.csv

Processing Straight_slow-2024-10-16_21-29-36.zip...
Renamed: Metadata.csv -> Straight_slow_Metadata.csv
Renamed: Accelerometer.csv -> Str

In [20]:
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler

def normalize_data(df):
    """Normalize numeric sensor data columns to [0, 1] range."""
    scaler = MinMaxScaler()
    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
    df[numeric_cols] = scaler.fit_transform(df[numeric_cols])
    return df

def smooth_data(df, window_size=5):
    """Apply a moving average filter to smooth the sensor data."""
    numeric_cols = df.select_dtypes(include=['float64', 'int64']).columns
    df[numeric_cols] = df[numeric_cols].rolling(window=window_size, min_periods=1).mean()
    return df

def resample_data(df, sampling_rate='100ms'):
    """Resample data to a uniform sampling rate."""
    if 'time' in df.columns:
        df['time'] = pd.to_datetime(df['time'])
        df.set_index('time', inplace=True)
        df = df.resample(sampling_rate).mean().interpolate()
        df.reset_index(inplace=True)
    return df

# Process all CSV files
csv_files = [file for file in os.listdir() if file.endswith('.csv')]

for csv_file in csv_files:
    print(f"\nProcessing {csv_file}...")

    # Step 1: Check if the file is empty or has issues
    try:
        # Load the CSV file
        df = pd.read_csv(csv_file)

        # Skip if the DataFrame is empty
        if df.empty:
            print(f"Skipping {csv_file}: file is empty.")
            continue

    except pd.errors.EmptyDataError:
        print(f"Skipping {csv_file}: No columns to parse.")
        continue
    except Exception as e:
        print(f"Skipping {csv_file}: Error encountered - {e}")
        continue

    # Step 2: Normalize the data
    print(f"Before normalization:\n{df.head()}")
    df = normalize_data(df)
    print(f"After normalization:\n{df.head()}")  # Print the first few rows after normalization

    # Step 3: Apply noise reduction (smoothing)
    print(f"Before smoothing:\n{df.head()}")
    df = smooth_data(df, window_size=5)
    print(f"After smoothing:\n{df.head()}")  # Print the first few rows after smoothing

    # Step 4: Resample data (if a timestamp column is present)
    print(f"Before resampling:\n{df.head()}")
    df = resample_data(df, sampling_rate='100ms')
    print(f"After resampling:\n{df.head()}")  # Print the first few rows after resampling

    # Save the preprocessed data to a new CSV file
    preprocessed_filename = f"preprocessed_{csv_file}"
    df.to_csv(preprocessed_filename, index=False)
    print(f"Saved preprocessed data to {preprocessed_filename}")

# List the preprocessed files to verify
print("\nPreprocessed files:")
!ls preprocessed_*.csv



Processing Curve_fast_Metadata.csv...
Before normalization:
   version device name  recording epoch time       recording time  \
0        3  M2102J20SG         1729114987922  2024-10-16_21-43-07   

  recording timezone platform appVersion  \
0    America/Toronto  android     1.38.0   

                              device id  \
0  1b41dcc6-5daf-4176-b6a9-76ec6f76b7a7   

                                             sensors  \
0  Accelerometer|Gravity|Gyroscope|Orientation|An...   

                                        sampleRateMs  standardisation  
0  33.333333333333336|33.333333333333336|33.33333...            False  
After normalization:
   version device name  recording epoch time       recording time  \
0      0.0  M2102J20SG                   0.0  2024-10-16_21-43-07   

  recording timezone platform appVersion  \
0    America/Toronto  android     1.38.0   

                              device id  \
0  1b41dcc6-5daf-4176-b6a9-76ec6f76b7a7   

                               

In [None]:
import pandas as pd

# Assuming `sensor_data` is your DataFrame after normalization

# Step 1: Ensure 'time' is in datetime format if not already
sensor_data['time'] = pd.to_datetime(sensor_data['time'], unit='ns')  # Adjust 'unit' as per your data if needed

# Step 2: Set 'time' as the index
sensor_data.set_index('time', inplace=True)

# Step 3: Resample the data at a specific frequency (e.g., 100ms)
freq = '100ms'  # Change this to your desired frequency (e.g., '1s' for 1 second intervals)
sensor_data_resampled = sensor_data.resample(freq).interpolate(method='linear')

# Step 4: Check the data after resampling
print(f"Resampled data sample:\n{sensor_data_resampled.head()}")


Resampled data sample:
                            x     y     z
time                                     
2024-10-16 00:00:00.000  0.00  0.00  0.00
2024-10-16 00:00:00.100  0.25  0.25  0.25
2024-10-16 00:00:00.200  0.50  0.50  0.50
2024-10-16 00:00:00.300  0.75  0.75  0.75
2024-10-16 00:00:00.400  1.00  1.00  1.00


In [None]:
import pandas as pd
import numpy as np

# Assuming `sensor_data_resampled` is your DataFrame after resampling

# Step 1: Apply a low-pass filter (Moving Average) to the sensor data
window_size = 5  # Adjust this window size based on the level of smoothing required
sensor_data_filtered = sensor_data_resampled.rolling(window=window_size, min_periods=1).mean()

# Alternative: Exponential Moving Average (smoothing)
# alpha = 0.1  # smoothing factor (0 < alpha < 1)
# sensor_data_filtered = sensor_data_resampled.ewm(alpha=alpha, adjust=False).mean()

# Step 2: Check the data after noise reduction
print(f"Data after noise reduction:\n{sensor_data_filtered.head()}")


Data after noise reduction:
                             x      y      z
time                                        
2024-10-16 00:00:00.000  0.000  0.000  0.000
2024-10-16 00:00:00.100  0.125  0.125  0.125
2024-10-16 00:00:00.200  0.250  0.250  0.250
2024-10-16 00:00:00.300  0.375  0.375  0.375
2024-10-16 00:00:00.400  0.500  0.500  0.500


In [None]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis

# Assuming sensor_data_filtered is the data after noise reduction

def extract_features(sensor_data):
    features = {}

    # Time-domain features
    features['mean_x'] = sensor_data['x'].mean()
    features['mean_y'] = sensor_data['y'].mean()
    features['mean_z'] = sensor_data['z'].mean()

    features['std_x'] = sensor_data['x'].std()
    features['std_y'] = sensor_data['y'].std()
    features['std_z'] = sensor_data['z'].std()

    features['max_x'] = sensor_data['x'].max()
    features['max_y'] = sensor_data['y'].max()
    features['max_z'] = sensor_data['z'].max()

    features['min_x'] = sensor_data['x'].min()
    features['min_y'] = sensor_data['y'].min()
    features['min_z'] = sensor_data['z'].min()

    features['range_x'] = features['max_x'] - features['min_x']
    features['range_y'] = features['max_y'] - features['min_y']
    features['range_z'] = features['max_z'] - features['min_z']

    features['skew_x'] = skew(sensor_data['x'])
    features['skew_y'] = skew(sensor_data['y'])
    features['skew_z'] = skew(sensor_data['z'])

    features['kurtosis_x'] = kurtosis(sensor_data['x'])
    features['kurtosis_y'] = kurtosis(sensor_data['y'])
    features['kurtosis_z'] = kurtosis(sensor_data['z'])

    # RMS (Root Mean Square)
    features['rms_x'] = np.sqrt(np.mean(np.square(sensor_data['x'])))
    features['rms_y'] = np.sqrt(np.mean(np.square(sensor_data['y'])))
    features['rms_z'] = np.sqrt(np.mean(np.square(sensor_data['z'])))

    # Velocity (Cumulative Sum of Acceleration)
    features['velocity_x'] = np.cumsum(sensor_data['x'])
    features['velocity_y'] = np.cumsum(sensor_data['y'])
    features['velocity_z'] = np.cumsum(sensor_data['z'])

    # Jerk (Rate of change of acceleration)
    jerk_x = np.diff(sensor_data['x'], n=1)
    jerk_y = np.diff(sensor_data['y'], n=1)
    jerk_z = np.diff(sensor_data['z'], n=1)
    features['jerk_x'] = np.mean(np.abs(jerk_x))
    features['jerk_y'] = np.mean(np.abs(jerk_y))
    features['jerk_z'] = np.mean(np.abs(jerk_z))

    return features

# Apply feature extraction
extracted_features = extract_features(sensor_data_filtered)

# Convert extracted features into a DataFrame
features_df = pd.DataFrame([extracted_features])

# Show extracted features
print(f"Extracted Features:\n{features_df}")


Extracted Features:
   mean_x  mean_y  mean_z     std_x     std_y     std_z  max_x  max_y  max_z  \
0    0.25    0.25    0.25  0.197642  0.197642  0.197642    0.5    0.5    0.5   

   min_x  ...  kurtosis_z     rms_x     rms_y     rms_z  \
0    0.0  ...        -1.3  0.306186  0.306186  0.306186   

                                          velocity_x  \
0  time
2024-10-16 00:00:00.000    0.000
2024-10-...   

                                          velocity_y  \
0  time
2024-10-16 00:00:00.000    0.000
2024-10-...   

                                          velocity_z  jerk_x  jerk_y  jerk_z  
0  time
2024-10-16 00:00:00.000    0.000
2024-10-...   0.125   0.125   0.125  

[1 rows x 30 columns]


In [None]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis

# Assuming sensor_data_filtered is the data after noise reduction

def extract_features(sensor_data):
    features = {}

    # Time-domain features
    features['mean_x'] = sensor_data['x'].mean()
    features['mean_y'] = sensor_data['y'].mean()
    features['mean_z'] = sensor_data['z'].mean()

    features['std_x'] = sensor_data['x'].std()
    features['std_y'] = sensor_data['y'].std()
    features['std_z'] = sensor_data['z'].std()

    features['max_x'] = sensor_data['x'].max()
    features['max_y'] = sensor_data['y'].max()
    features['max_z'] = sensor_data['z'].max()

    features['min_x'] = sensor_data['x'].min()
    features['min_y'] = sensor_data['y'].min()
    features['min_z'] = sensor_data['z'].min()

    features['range_x'] = features['max_x'] - features['min_x']
    features['range_y'] = features['max_y'] - features['min_y']
    features['range_z'] = features['max_z'] - features['min_z']

    features['skew_x'] = skew(sensor_data['x'])
    features['skew_y'] = skew(sensor_data['y'])
    features['skew_z'] = skew(sensor_data['z'])

    features['kurtosis_x'] = kurtosis(sensor_data['x'])
    features['kurtosis_y'] = kurtosis(sensor_data['y'])
    features['kurtosis_z'] = kurtosis(sensor_data['z'])

    # RMS (Root Mean Square)
    features['rms_x'] = np.sqrt(np.mean(np.square(sensor_data['x'])))
    features['rms_y'] = np.sqrt(np.mean(np.square(sensor_data['y'])))
    features['rms_z'] = np.sqrt(np.mean(np.square(sensor_data['z'])))

    # Velocity (Cumulative Sum of Acceleration)
    features['velocity_x'] = np.cumsum(sensor_data['x'])
    features['velocity_y'] = np.cumsum(sensor_data['y'])
    features['velocity_z'] = np.cumsum(sensor_data['z'])

    # Jerk (Rate of change of acceleration)
    jerk_x = np.diff(sensor_data['x'], n=1)
    jerk_y = np.diff(sensor_data['y'], n=1)
    jerk_z = np.diff(sensor_data['z'], n=1)
    features['jerk_x'] = np.mean(np.abs(jerk_x))
    features['jerk_y'] = np.mean(np.abs(jerk_y))
    features['jerk_z'] = np.mean(np.abs(jerk_z))

    # Trajectory Analysis
    # Total displacement (Euclidean distance between consecutive points)
    displacement = np.sqrt(np.diff(sensor_data['x'])**2 + np.diff(sensor_data['y'])**2 + np.diff(sensor_data['z'])**2)
    features['total_displacement'] = np.sum(displacement)

    # Mean displacement (average distance between consecutive points)
    features['mean_displacement'] = np.mean(displacement)

    # Total distance traveled (Cumulative sum of displacement)
    features['total_distance'] = np.sum(displacement)

    return features

# Apply feature extraction
extracted_features = extract_features(sensor_data_filtered)

# Convert extracted features into a DataFrame
features_df = pd.DataFrame([extracted_features])

# Show extracted features
print(f"Extracted Features:\n{features_df}")


Extracted Features:
   mean_x  mean_y  mean_z     std_x     std_y     std_z  max_x  max_y  max_z  \
0    0.25    0.25    0.25  0.197642  0.197642  0.197642    0.5    0.5    0.5   

   min_x  ...     rms_z                                         velocity_x  \
0    0.0  ...  0.306186  time
2024-10-16 00:00:00.000    0.000
2024-10-...   

                                          velocity_y  \
0  time
2024-10-16 00:00:00.000    0.000
2024-10-...   

                                          velocity_z  jerk_x  jerk_y  jerk_z  \
0  time
2024-10-16 00:00:00.000    0.000
2024-10-...   0.125   0.125   0.125   

   total_displacement  mean_displacement  total_distance  
0            0.866025           0.216506        0.866025  

[1 rows x 33 columns]


In [None]:
import numpy as np
import pandas as pd

# Assuming sensor_data_filtered is your original data

def jittering(sensor_data, noise_factor=0.01):
    """ Add small random noise to the data to simulate sensor variations. """
    noise = np.random.normal(0, noise_factor, sensor_data.shape)  # Create random noise
    augmented_data = sensor_data + noise
    return augmented_data

def scaling(sensor_data, scale_factor=1.1):
    """ Adjust the amplitude of the sensor readings to simulate speed variations. """
    augmented_data = sensor_data * scale_factor  # Scale the sensor data
    return augmented_data

def time_warping(sensor_data, warp_factor=0.2):
    """ Stretch or compress the time axis to simulate different motion patterns. """
    # Create a time warp factor for each axis
    time_warp_x = np.interp(np.arange(0, len(sensor_data), warp_factor), np.arange(0, len(sensor_data)), sensor_data['x'])
    time_warp_y = np.interp(np.arange(0, len(sensor_data), warp_factor), np.arange(0, len(sensor_data)), sensor_data['y'])
    time_warp_z = np.interp(np.arange(0, len(sensor_data), warp_factor), np.arange(0, len(sensor_data)), sensor_data['z'])

    # Create new DataFrame with warped time data
    augmented_data = pd.DataFrame({
        'x': time_warp_x,
        'y': time_warp_y,
        'z': time_warp_z
    })
    return augmented_data

def augment_data(sensor_data, jitter=True, scale=True, time_warp=True):
    """ Apply all augmentation techniques and return augmented dataset. """
    augmented_data = sensor_data.copy()

    if jitter:
        augmented_data = jittering(augmented_data)

    if scale:
        augmented_data = scaling(augmented_data)

    if time_warp:
        augmented_data = time_warping(augmented_data)

    return augmented_data

# Example: Apply data augmentation to your filtered sensor data
augmented_sensor_data = augment_data(sensor_data_filtered)

# Display the augmented data (optional)
print(augmented_sensor_data.head())


          x         y         z
0 -0.012872  0.001852 -0.002581
1  0.020010  0.025585  0.026646
2  0.052891  0.049319  0.055873
3  0.085773  0.073052  0.085099
4  0.118654  0.096786  0.114326


In [None]:
import numpy as np
import pandas as pd

# Assuming your original sensor data looks like this
sensor_data = pd.DataFrame({
    'x': [-0.012872, 0.020010, 0.052891, 0.085773, 0.118654],
    'y': [0.001852, 0.025585, 0.049319, 0.073052, 0.096786],
    'z': [-0.002581, 0.026646, 0.055873, 0.085099, 0.114326]
})

def jittering(sensor_data, noise_factor=0.01):
    """ Add small random noise to the data to simulate sensor variations. """
    noise = np.random.normal(0, noise_factor, sensor_data.shape)  # Create random noise
    augmented_data = sensor_data + noise
    return augmented_data

def scaling(sensor_data, scale_factor=1.1):
    """ Adjust the amplitude of the sensor readings to simulate speed variations. """
    augmented_data = sensor_data * scale_factor  # Scale the sensor data
    return augmented_data

def time_warping(sensor_data, warp_factor=0.2):
    """ Stretch or compress the time axis to simulate different motion patterns. """
    # Create a time warp factor for each axis
    time_warp_x = np.interp(np.arange(0, len(sensor_data), warp_factor), np.arange(0, len(sensor_data)), sensor_data['x'])
    time_warp_y = np.interp(np.arange(0, len(sensor_data), warp_factor), np.arange(0, len(sensor_data)), sensor_data['y'])
    time_warp_z = np.interp(np.arange(0, len(sensor_data), warp_factor), np.arange(0, len(sensor_data)), sensor_data['z'])

    # Create new DataFrame with warped time data
    augmented_data = pd.DataFrame({
        'x': time_warp_x,
        'y': time_warp_y,
        'z': time_warp_z
    })
    return augmented_data

def augment_data(sensor_data, jitter=True, scale=True, time_warp=True):
    """ Apply all augmentation techniques and return augmented dataset. """
    augmented_data = sensor_data.copy()

    if jitter:
        augmented_data = jittering(augmented_data)

    if scale:
        augmented_data = scaling(augmented_data)

    if time_warp:
        augmented_data = time_warping(augmented_data)

    return augmented_data

# Example: Apply data augmentation to your original sensor data
augmented_sensor_data = augment_data(sensor_data)

# Display the augmented data (optional)
print("Original Data:\n", sensor_data)
print("\nAugmented Data:\n", augmented_sensor_data)


Original Data:
           x         y         z
0 -0.012872  0.001852 -0.002581
1  0.020010  0.025585  0.026646
2  0.052891  0.049319  0.055873
3  0.085773  0.073052  0.085099
4  0.118654  0.096786  0.114326

Augmented Data:
            x         y         z
0  -0.008943  0.008494 -0.002401
1  -0.002353  0.014367  0.004700
2   0.004237  0.020240  0.011801
3   0.010828  0.026113  0.018901
4   0.017418  0.031986  0.026002
5   0.024008  0.037859  0.033103
6   0.028411  0.039688  0.039378
7   0.032813  0.041517  0.045652
8   0.037215  0.043346  0.051927
9   0.041618  0.045175  0.058202
10  0.046020  0.047004  0.064476
11  0.056758  0.054599  0.074513
12  0.067496  0.062194  0.084550
13  0.078234  0.069789  0.094587
14  0.088972  0.077384  0.104624
15  0.099711  0.084979  0.114661
16  0.106954  0.086788  0.118695
17  0.114197  0.088597  0.122729
18  0.121441  0.090406  0.126763
19  0.128684  0.092215  0.130797
20  0.135927  0.094025  0.134831
21  0.135927  0.094025  0.134831
22  0.135927  0

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Feature extraction function (same as before)
def extract_features(sensor_data):
    features = {}
    features['mean_x'] = sensor_data['x'].mean()
    features['mean_y'] = sensor_data['y'].mean()
    features['mean_z'] = sensor_data['z'].mean()

    features['std_x'] = sensor_data['x'].std()
    features['std_y'] = sensor_data['y'].std()
    features['std_z'] = sensor_data['z'].std()

    features['rms_x'] = np.sqrt(np.mean(np.square(sensor_data['x'])))
    features['rms_y'] = np.sqrt(np.mean(np.square(sensor_data['y'])))
    features['rms_z'] = np.sqrt(np.mean(np.square(sensor_data['z'])))

    features['skew_x'] = skew(sensor_data['x'])
    features['skew_y'] = skew(sensor_data['y'])
    features['skew_z'] = skew(sensor_data['z'])

    features['kurtosis_x'] = kurtosis(sensor_data['x'])
    features['kurtosis_y'] = kurtosis(sensor_data['y'])
    features['kurtosis_z'] = kurtosis(sensor_data['z'])

    return features

# Assuming sensor_data is a DataFrame containing 'x', 'y', 'z', and 'time'
# Example data
sensor_data = pd.DataFrame({
    'x': [-0.012872, 0.020010, 0.052891, 0.085773, 0.118654],
    'y': [0.001852, 0.025585, 0.049319, 0.073052, 0.096786],
    'z': [-0.002581, 0.026646, 0.055873, 0.085099, 0.114326],
    'time': [0, 1, 2, 3, 4]  # assuming 1-second intervals
})

# Extract features
features = extract_features(sensor_data)
features_df = pd.DataFrame([features])

# Assume we have corresponding ground truth distance data (for training)
# Example ground truth distances (in meters)
ground_truth_distance = [0.01, 0.05, 0.15, 0.35, 0.60]  # These should be actual measurements

# Create training data
X = features_df  # Input features
y = ground_truth_distance  # Ground truth distances

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'Predicted Distances: {y_pred}')
print(f'Actual Distances: {y_test}')


ValueError: Found input variables with inconsistent numbers of samples: [1, 5]

In [None]:
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Feature extraction function (same as before)
def extract_features(sensor_data):
    features = {}
    features['mean_x'] = sensor_data['x'].mean()
    features['mean_y'] = sensor_data['y'].mean()
    features['mean_z'] = sensor_data['z'].mean()

    features['std_x'] = sensor_data['x'].std()
    features['std_y'] = sensor_data['y'].std()
    features['std_z'] = sensor_data['z'].std()

    features['rms_x'] = np.sqrt(np.mean(np.square(sensor_data['x'])))
    features['rms_y'] = np.sqrt(np.mean(np.square(sensor_data['y'])))
    features['rms_z'] = np.sqrt(np.mean(np.square(sensor_data['z'])))

    features['skew_x'] = skew(sensor_data['x'])
    features['skew_y'] = skew(sensor_data['y'])
    features['skew_z'] = skew(sensor_data['z'])

    features['kurtosis_x'] = kurtosis(sensor_data['x'])
    features['kurtosis_y'] = kurtosis(sensor_data['y'])
    features['kurtosis_z'] = kurtosis(sensor_data['z'])

    return features

# Example sensor data: splitting it into windows (for illustration)
sensor_data = pd.DataFrame({
    'x': [-0.012872, 0.020010, 0.052891, 0.085773, 0.118654, 0.15, 0.18, 0.25],
    'y': [0.001852, 0.025585, 0.049319, 0.073052, 0.096786, 0.12, 0.15, 0.18],
    'z': [-0.002581, 0.026646, 0.055873, 0.085099, 0.114326, 0.13, 0.16, 0.2],
})

# Let's simulate a few more data points and calculate the features over 2-second windows
window_size = 2  # You can adjust this window size
num_windows = len(sensor_data) // window_size

# Prepare the feature matrix (X) and target (y)
X = []
y = []

for i in range(num_windows):
    start = i * window_size
    end = start + window_size
    window_data = sensor_data[start:end]
    features = extract_features(window_data)
    X.append(features)

    # Assuming you have corresponding distance values (replace with actual ground truth)
    distance = 0.1 * (i + 1)  # Dummy distances (in meters)
    y.append(distance)

# Convert X and y to DataFrames
X = pd.DataFrame(X)
y = np.array(y)

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'Predicted Distances: {y_pred}')
print(f'Actual Distances: {y_test}')


Mean Squared Error: 0.00025600000000000313
Predicted Distances: [0.216]
Actual Distances: [0.2]


In [None]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# Example: Already extracted features from the sensor data
# Assume that sensor_data_filtered is already processed and features have been extracted
# For illustration, here's a simulated feature DataFrame (replace with your actual extracted features)
X = pd.DataFrame({
    'mean_x': [-0.01, 0.02, 0.05, 0.08, 0.12],
    'mean_y': [0.001, 0.025, 0.049, 0.073, 0.096],
    'mean_z': [-0.002, 0.026, 0.055, 0.085, 0.114],
    'std_x': [0.03, 0.02, 0.025, 0.035, 0.04],
    'std_y': [0.01, 0.02, 0.015, 0.02, 0.025],
    'std_z': [0.005, 0.01, 0.02, 0.01, 0.015],
    'rms_x': [0.04, 0.05, 0.06, 0.07, 0.08],
    'rms_y': [0.01, 0.015, 0.02, 0.025, 0.03],
    'rms_z': [0.02, 0.025, 0.03, 0.035, 0.04],
    'skew_x': [0.1, 0.2, 0.1, 0.3, 0.4],
    'skew_y': [0.05, 0.1, 0.15, 0.2, 0.25],
    'skew_z': [0.08, 0.12, 0.18, 0.14, 0.3],
    'kurtosis_x': [3, 2.8, 3.2, 3.5, 3.6],
    'kurtosis_y': [3.1, 2.9, 3.3, 3.4, 3.7],
    'kurtosis_z': [3.2, 3, 3.4, 3.6, 3.8]
})

# Corresponding distance values (y) for each feature set in X
# These should correspond to each window/segment from the sensor data
y = np.array([0.1, 0.2, 0.3, 0.4, 0.5])  # Replace with actual distance ground truth

# Split into train and test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train a Random Forest Regressor
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Evaluate the model's performance
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'Predicted Distances: {y_pred}')
print(f'Actual Distances: {y_test}')


Mean Squared Error: 0.002025000000000006
Predicted Distances: [0.245]
Actual Distances: [0.2]


In [None]:
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error

# Train an SVR model
svr_model = SVR(kernel='rbf')  # Radial Basis Function kernel, you can try 'linear', 'poly', etc.
svr_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_svr = svr_model.predict(X_test)

# Evaluate the model's performance
mse_svr = mean_squared_error(y_test, y_pred_svr)
print(f'Mean Squared Error (SVR): {mse_svr}')
print(f'Predicted Distances (SVR): {y_pred_svr}')
print(f'Actual Distances: {y_test}')


Mean Squared Error (SVR): 0.001857059946457109
Predicted Distances (SVR): [0.24309362]
Actual Distances: [0.2]


In [None]:
import xgboost as xgb
from sklearn.metrics import mean_squared_error

# Train an XGBoost model
xgboost_model = xgb.XGBRegressor(objective ='reg:squarederror', n_estimators=100, random_state=42)
xgboost_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_xgb = xgboost_model.predict(X_test)

# Evaluate the model's performance
mse_xgb = mean_squared_error(y_test, y_pred_xgb)
print(f'Mean Squared Error (XGBoost): {mse_xgb}')
print(f'Predicted Distances (XGBoost): {y_pred_xgb}')
print(f'Actual Distances: {y_test}')


Mean Squared Error (XGBoost): 0.009796267385560697
Predicted Distances (XGBoost): [0.1010239]
Actual Distances: [0.2]
