<a href="https://colab.research.google.com/github/Samplergithub769/mtu-xsens-imu-Sensor/blob/main/mtu_xsens_imu_Sensor.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import Libraries**

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns



**Load Datasets**

In [None]:
df_25hz = pd.read_csv("/content/25hz.csv", skiprows=12)
df_50hz = pd.read_csv("/content/50hz.csv", skiprows=12)
df_100hz = pd.read_csv("/content/100hz.csv", skiprows=12)
df_noise_25hz = pd.read_csv("/content/Noise_25HZ.csv", skiprows=12)
df_noise_50hz = pd.read_csv("/content/Noise_50hz.csv", skiprows=12)
df_noise_100hz = pd.read_csv("/content/Noise_100hz.csv", skiprows=12)


**Display the first few rows and information for each DataFrame**

In [None]:
print("25hz.csv:")
print(df_25hz.head())

print("\n50hz.csv:")
print(df_50hz.head())

print("\n100hz.csv:")
print(df_100hz.head())

print("\nNoise_25HZ.csv:")
print(df_noise_25hz.head())

print("\nNoise_50hz.csv:")
print(df_noise_50hz.head())

print("\nNoise_100hz.csv:")
print(df_noise_100hz.head())

In [None]:
print("Info for df_25hz:")
df_25hz.info()
print("\nMissing values in df_25hz:")
print(df_25hz.isnull().sum())

print("\nInfo for df_50hz:")
df_50hz.info()
print("\nMissing values in df_50hz:")
print(df_50hz.isnull().sum())

print("\nInfo for df_100hz:")
df_100hz.info()
print("\nMissing values in df_100hz:")
print(df_100hz.isnull().sum())

print("\nInfo for df_noise_25hz:")
df_noise_25hz.info()
print("\nMissing values in df_noise_25hz:")
print(df_noise_25hz.isnull().sum())

print("\nInfo for df_noise_50hz:")
df_noise_50hz.info()
print("\nMissing values in df_noise_50hz:")
print(df_noise_50hz.isnull().sum())

print("\nInfo for df_noise_100hz:")
df_noise_100hz.info()
print("\nMissing values in df_noise_100hz:")
print(df_noise_100hz.isnull().sum())

In [None]:
#Define the columns for analysis
acc_cols = ['AccHR_X', 'AccHR_Y', 'AccHR_Z']

# Function to plot histograms side by side
def plot_histograms(clean_df, noisy_df, label):
    plt.figure(figsize=(14, 10))
    for i, col in enumerate(acc_cols):
        plt.subplot(3, 2, 2*i + 1)
        plt.hist(clean_df[col], bins=100, alpha=0.7, color='blue', label=f'Clean {label}')
        plt.title(f'{col} - Clean {label}')
        plt.grid(True)
        plt.legend()

        plt.subplot(3, 2, 2*i + 2)
        plt.hist(noisy_df[col], bins=100, alpha=0.7, color='red', label=f'Noisy {label}')
        plt.title(f'{col} - Noisy {label}')
        plt.grid(True)
        plt.legend()

    plt.tight_layout()
    plt.show()

# Use the function for each frequency
plot_histograms(df_25hz, df_noise_25hz, "25Hz")
plot_histograms(df_50hz, df_noise_50hz, "50Hz")
plot_histograms(df_100hz, df_noise_100hz, "100Hz")




**Box Plots**

In [None]:
# Prepare data for box plots
data = {
    '25Hz': {
        'Clean': df_25hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']],
        'Noisy': df_noise_25hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']]
    },
    '50Hz': {
        'Clean': df_50hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']],
        'Noisy': df_noise_50hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']]
    },
    '100Hz': {
        'Clean': df_100hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']],
        'Noisy': df_noise_100hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']]
    }
}

axes = ['AccHR_X', 'AccHR_Y', 'AccHR_Z']

# Plot boxplots using loop
for freq, sets in data.items():
    for axis in axes:
        plt.figure(figsize=(12, 6))
        plt.boxplot([
            sets['Clean'][axis].dropna(),
            sets['Noisy'][axis].dropna()
        ], labels=[f'Clean {freq}', f'Noisy {freq}'])
        plt.title(f'Box Plot of {axis} ({freq})')
        plt.ylabel('Acceleration')
        plt.grid(True)
        plt.tight_layout()
        plt.show()


**Create plots for Temperature vs SampleTimeFine for each dataset**

In [None]:
plt.figure(figsize=(18, 15))

# Plot for 25hz.csv
plt.subplot(3, 2, 1)
plt.plot(df_25hz['SampleTimeFine'], df_25hz['Temperature'], color='blue')
plt.title('Temperature vs SampleTimeFine (25Hz Clean)')
plt.xlabel('SampleTimeFine')
plt.ylabel('Temperature')
plt.grid(True)

# Plot for Noise_25HZ.csv
plt.subplot(3, 2, 2)
plt.plot(df_noise_25hz['SampleTimeFine'], df_noise_25hz['Temperature'], color='red')
plt.title('Temperature vs SampleTimeFine (25Hz Noisy)')
plt.xlabel('SampleTimeFine')
plt.ylabel('Temperature')
plt.grid(True)

# Plot for 50hz.csv
plt.subplot(3, 2, 3)
plt.plot(df_50hz['SampleTimeFine'], df_50hz['Temperature'], color='purple')
plt.title('Temperature vs SampleTimeFine (50Hz Clean)')
plt.xlabel('SampleTimeFine')
plt.ylabel('Temperature')
plt.grid(True)

# Plot for Noise_50hz.csv
plt.subplot(3, 2, 4)
plt.plot(df_noise_50hz['SampleTimeFine'], df_noise_50hz['Temperature'], color='brown')
plt.title('Temperature vs SampleTimeFine (50Hz Noisy)')
plt.xlabel('SampleTimeFine')
plt.ylabel('Temperature')
plt.grid(True)


# Plot for 100hz.csv
plt.subplot(3, 2, 5)
plt.plot(df_100hz['SampleTimeFine'], df_100hz['Temperature'], color='green')
plt.title('Temperature vs SampleTimeFine (100Hz Clean)')
plt.xlabel('SampleTimeFine')
plt.ylabel('Temperature')
plt.grid(True)

# Plot for Noise_100hz.csv
plt.subplot(3, 2, 6)
plt.plot(df_noise_100hz['SampleTimeFine'], df_noise_100hz['Temperature'], color='orange')
plt.title('Temperature vs SampleTimeFine (100Hz Noisy)')
plt.xlabel('SampleTimeFine')
plt.ylabel('Temperature')
plt.grid(True)

In [None]:
# Create a function to plot Roll, Pitch, Yaw data
def plot_rpy(df, title_prefix, filename_prefix):
    plt.figure(figsize=(15, 5))

    plt.subplot(1, 3, 1)
    plt.plot(df['SampleTimeFine'], df['Roll'], label='Roll')
    plt.title(f'{title_prefix} - Roll')
    plt.xlabel('SampleTimeFine')
    plt.ylabel('Roll')
    plt.legend()

    plt.subplot(1, 3, 2)
    plt.plot(df['SampleTimeFine'], df['Pitch'], label='Pitch', color='orange')
    plt.title(f'{title_prefix} - Pitch')
    plt.xlabel('SampleTimeFine')
    plt.ylabel('Pitch')
    plt.legend()

    plt.subplot(1, 3, 3)
    plt.plot(df['SampleTimeFine'], df['Yaw'], label='Yaw', color='green')
    plt.title(f'{title_prefix} - Yaw')
    plt.xlabel('SampleTimeFine')
    plt.ylabel('Yaw')
    plt.legend()

# Plotting clean datasets
plot_rpy(df_25hz, 'Clean 25Hz Data', 'clean_25hz')
plot_rpy(df_noise_25hz, 'Noisy 25Hz Data', 'noisy_25hz')
plot_rpy(df_50hz, 'Clean 50Hz Data', 'clean_50hz')
plot_rpy(df_noise_50hz, 'Noisy 50Hz Data', 'noisy_50hz')
plot_rpy(df_100hz, 'Clean 100Hz Data', 'clean_100hz')
plot_rpy(df_noise_100hz, 'Noisy 100Hz Data', 'noisy_100hz')


**Calculate and print summary statistics for 'AccHR_X', 'AccHR_Y', 'AccHR_Z'**

In [None]:
print("Summary Statistics for 25Hz Clean Data:")
print(df_25hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']].describe())

print("\nSummary Statistics for 25Hz Noisy Data:")
print(df_noise_25hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']].describe())

print("\nSummary Statistics for 50Hz Clean Data:")
print(df_50hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']].describe())

print("\nSummary Statistics for 50Hz Noisy Data:")
print(df_noise_50hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']].describe())

print("\nSummary Statistics for 100Hz Clean Data:")
print(df_100hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']].describe())

print("\nSummary Statistics for 100Hz Noisy Data:")
print(df_noise_100hz[['AccHR_X', 'AccHR_Y', 'AccHR_Z']].describe())

**Data Cleaning**

**Moving Average Filter**

In [None]:
# Define the columns for smoothing
acc_cols = ['AccHR_X', 'AccHR_Y', 'AccHR_Z']

# 25Hz
window_size_25hz = 100
df_noise_25hz_cleaned = df_noise_25hz.copy()
for col in acc_cols:
    df_noise_25hz_cleaned[col] = df_noise_25hz[col].rolling(window=window_size_25hz).mean()

# 50Hz
window_size_50hz = 200
df_noise_50hz_cleaned = df_noise_50hz.copy()
for col in acc_cols:
    df_noise_50hz_cleaned[col] = df_noise_50hz[col].rolling(window=window_size_50hz).mean()

# 100Hz
window_size_100hz = 400
df_noise_100hz_cleaned = df_noise_100hz.copy()
for col in acc_cols:
    df_noise_100hz_cleaned[col] = df_noise_100hz[col].rolling(window=window_size_100hz).mean()





In [None]:
df_noise_25hz_cleaned


In [None]:
df_noise_50hz_cleaned

In [None]:
df_noise_100hz_cleaned

In [None]:
df_noise_25hz_cleaned = df_noise_25hz_cleaned.dropna().reset_index(drop=True)
df_noise_50hz_cleaned = df_noise_50hz_cleaned.dropna().reset_index(drop=True)
df_noise_100hz_cleaned = df_noise_100hz_cleaned.dropna().reset_index(drop=True)


In [None]:
df_noise_25hz_cleaned

In [None]:
df_noise_50hz_cleaned

In [None]:
df_noise_100hz_cleaned

In [None]:
# Plot original vs smoothed for 25Hz noisy data (smoothed data replaces original columns)
plt.figure(figsize=(15, 15))

for i, col in enumerate(acc_cols):
    plt.subplot(3, 1, i + 1)

    # Plot original (before smoothing)
    plt.plot(df_noise_25hz['SampleTimeFine'], df_noise_25hz[col], label=f'Original {col} (Noisy)', alpha=0.5)

    # Plot smoothed (after replacing original columns)
    plt.plot(df_noise_25hz_cleaned['SampleTimeFine'], df_noise_25hz_cleaned[col], label=f'Smoothed {col} (Window={window_size_25hz})', color='green')

    plt.title(f'{col}: Original Noisy vs. Smoothed (25Hz)')
    plt.xlabel('SampleTimeFine')
    plt.ylabel('Acceleration')
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
# Plot original vs smoothed for 50Hz noisy data
plt.figure(figsize=(15, 15))

for i, col in enumerate(acc_cols):
    plt.subplot(3, 1, i + 1)

    # Original noisy data
    plt.plot(df_noise_50hz['SampleTimeFine'], df_noise_50hz[col], label=f'Original {col} (Noisy)', alpha=0.5)

    # Smoothed data (replaced in the same column)
    plt.plot(df_noise_50hz_cleaned['SampleTimeFine'], df_noise_50hz_cleaned[col], label=f'Smoothed {col} (Window={window_size_50hz})', color='green')

    plt.title(f'{col}: Original Noisy vs. Smoothed (50Hz)')
    plt.xlabel('SampleTimeFine')
    plt.ylabel('Acceleration')
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
# Plot original vs smoothed for 100Hz noisy data
plt.figure(figsize=(15, 15))

for i, col in enumerate(acc_cols):
    plt.subplot(3, 1, i + 1)

    # Original noisy data
    plt.plot(df_noise_100hz['SampleTimeFine'], df_noise_100hz[col], label=f'Original {col} (Noisy)', alpha=0.5)

    # Smoothed data (same column name after overwrite)
    plt.plot(df_noise_100hz_cleaned['SampleTimeFine'], df_noise_100hz_cleaned[col], label=f'Smoothed {col} (Window={window_size_100hz})', color='green')

    plt.title(f'{col}: Original Noisy vs. Smoothed (100Hz)')
    plt.xlabel('SampleTimeFine')
    plt.ylabel('Acceleration')
    plt.legend()
    plt.grid(True)

plt.tight_layout()
plt.show()


In [None]:
datasets = {
    "25Hz": df_noise_25hz_cleaned,
    "50Hz": df_noise_50hz_cleaned,
    "100Hz": df_noise_100hz_cleaned
}

# Acceleration columns to analyze
acc_cols = ['AccHR_X', 'AccHR_Y', 'AccHR_Z']

# Loop through each dataset and print summary stats
for label, df in datasets.items():
    smoothed_stats = df[acc_cols].describe()

    print(f"\nSummary Statistics for Smoothed Data ({label}):")
    print(smoothed_stats)


In [None]:
# Create dictionaries for original and smoothed noisy data
original_dataframes = {
    "25Hz": df_noise_25hz,
    "50Hz": df_noise_50hz,
    "100Hz": df_noise_100hz
}

smoothed_dataframes = {
    "25Hz": df_noise_25hz_cleaned,
    "50Hz": df_noise_50hz_cleaned,
    "100Hz": df_noise_100hz_cleaned
}

# Noise Reduction Summary
for name in ["25Hz", "50Hz", "100Hz"]:
    print(f"\nNoise Reduction Assessment for {name}")

    original_df = original_dataframes[name]
    smoothed_df = smoothed_dataframes[name]

    # Compute standard deviations for accelerometer columns
    original_std = original_df[acc_cols].std()
    smoothed_std = smoothed_df[acc_cols].std()

    # Calculate % reduction in standard deviation
    percentage_reduction = ((original_std - smoothed_std) / original_std) * 100

    # Display results
    print("\nStandard Deviation - Original (Noisy):")
    print(original_std.round(4))

    print("\nStandard Deviation - Smoothed:")
    print(smoothed_std.round(4))

    print("\nPercentage Reduction in Noise (Standard Deviation):")
    print(percentage_reduction.round(2))
    print("-" * 60)




To build a self-learning machine learning model that can be trained on sensor datasets and produce:

Low prediction error

Higher precision

Continuously learn and adapt from incoming data

The final model will be applied to predict the following sensor-based targets:

Temperature

Magnetic Field (Mag_X, Mag_Y, Mag_Z)




In [None]:
# Combine clean and smoothed noisy data with labels and frequency information

df_25hz['label'] = 0
df_25hz['Freq'] = 25

df_noise_25hz_cleaned['label'] = 1
df_noise_25hz_cleaned['Freq'] = 25

df_50hz['label'] = 0
df_50hz['Freq'] = 50

df_noise_50hz_cleaned['label'] = 1
df_noise_50hz_cleaned['Freq'] = 50

df_100hz['label'] = 0
df_100hz['Freq'] = 100

df_noise_100hz_cleaned['label'] = 1
df_noise_100hz_cleaned['Freq'] = 100

# Combine all dataframes
combined_df = pd.concat([
    df_25hz, df_noise_25hz_cleaned,
    df_50hz, df_noise_50hz_cleaned,
    df_100hz, df_noise_100hz_cleaned
], ignore_index=True)

# Display the first few rows of the combined dataframe
display(combined_df.head())

# Display info and missing values for the combined dataframe
print("\nInfo for combined_df:")
combined_df.info()

print("\nMissing values in combined_df:")
print(combined_df.isnull().sum())

In [None]:
features = [
    'AccHR_X', 'AccHR_Y', 'AccHR_Z',
    'Roll', 'Pitch', 'Yaw',
    'Mag_X', 'Mag_Y', 'Mag_Z', # We can use magnetic field to help predict temperature
    'Freq' # The sampling frequency can be a useful feature
]
X = combined_df[features]
y_temp = combined_df['Temperature']

In [None]:
from sklearn.model_selection import train_test_split, KFold, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline
from sklearn.metrics import mean_squared_error
temp_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
])

In [None]:
cv = KFold(n_splits=10, shuffle=True, random_state=42)

In [None]:
scores = cross_val_score(temp_pipeline, X, y_temp, cv=cv, scoring='neg_mean_squared_error')


In [None]:
rmse_scores = np.sqrt(-scores)

print(" Temperature Model Cross-Validation Results")
print(f"Scores for each of the 10 folds (RMSE): {np.round(rmse_scores, 4)}")
print(f"\nAverage RMSE: {rmse_scores.mean():.4f}")
print(f"Standard Deviation of RMSE: {rmse_scores.std():.4f}")


In [None]:
from sklearn.metrics import mean_absolute_error, r2_score

# Predictions
y_temp_pred = temp_pipeline.predict(X)

# Evaluate
mae_temp = mean_absolute_error(y_temp, y_temp_pred)
r2_temp = r2_score(y_temp, y_temp_pred)

print("\nTemperature Model Evaluation:")
print(f"MAE: {mae_temp:.4f}")
print(f"R² Score: {r2_temp:.4f}")


In [None]:
# Magnetic Field Model

# Define features and target
mag_features = [
    'AccHR_X', 'AccHR_Y', 'AccHR_Z',
    'Roll', 'Pitch', 'Yaw',
    'Temperature', # We use temperature to help predict magnetic field
    'Freq'
]
X_mag = combined_df[mag_features]
y_mag = combined_df[['Mag_X', 'Mag_Y', 'Mag_Z']]

# Build the pipeline
from sklearn.multioutput import MultiOutputRegressor

# Build the pipeline using MultiOutputRegressor for 3-target output
mag_pipeline = Pipeline([
    ('scaler', StandardScaler()),
    ('model', MultiOutputRegressor(RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1)))
])


# Validate with Cross-Validation
mag_scores = cross_val_score(mag_pipeline, X_mag, y_mag, cv=cv, scoring='neg_mean_squared_error')

avg_mag_rmse = np.sqrt(-mag_scores).mean()
std_mag_rmse = np.sqrt(-mag_scores).std()

print("\nMagnetic Field Model Cross-Validation Results ")
print(f"Average RMSE: {avg_mag_rmse:.4f}")
print(f"Standard Deviation of RMSE: {std_mag_rmse:.4f}")

In [None]:
# Predictions
y_mag_pred = mag_pipeline.predict(X_mag)

# Evaluate each magnetic axis separately
mae_mag = mean_absolute_error(y_mag, y_mag_pred)
r2_mag = r2_score(y_mag, y_mag_pred)

print("\nMagnetic Field Model Evaluation:")
print(f"MAE: {mae_mag:.4f}")
print(f"R² Score: {r2_mag:.4f}")


In [None]:
# Train the final temperature pipeline on all data
temp_pipeline.fit(X, y_temp)

# Train the final magnetic field pipeline on all data
mag_pipeline.fit(X_mag, y_mag)

# save these final, robust pipelines
import pickle

with open('robust_temp_pipeline.pkl', 'wb') as f:
    pickle.dump(temp_pipeline, f)

with open('robust_mag_pipeline.pkl', 'wb') as f:
    pickle.dump(mag_pipeline, f)

print("\nFinal, robust pipelines have been trained and saved.")