In [29]:
import pandas as pd
import numpy as np

In [30]:
def modified_z_score(series):
    median = series.median()
    mad = np.median(np.abs(series - median))
    modified_z_scores = 0.6745 * (series - median) / mad
    return modified_z_scores

def remove_outliers_and_apply_thresholds(df, columns, thresholds, z_threshold=3.5):
    for col in columns:
        # Remove outliers using Modified Z-Score
        modified_z_scores = modified_z_score(df[col])
        df = df[np.abs(modified_z_scores) <= z_threshold]
        
        # Apply thresholds
        if col in thresholds:
            lower, upper = thresholds[col]
            df = df[(df[col] >= lower) & (df[col] <= upper)]
    
    return df

# Define thresholds for specific columns
thresholds = {
    "latitude": (-90, 90),
    "longitude": (-180, 180),
    "altitude": (-500, 12000),
    "course": (0, 360),
    "hacc": (0, 100),  # Assuming max horizontal accuracy of 100 meters
    "speed": (0, 300)  # Assuming max speed of 300 m/s
}



In [31]:
def merge_and_average(df):
    # Convert Timestamp to datetime and remove milliseconds
    df["Timestamp"] = pd.to_datetime(df["Timestamp"]).dt.floor("S")

    # Keep the relevant columns
    # Including extra columns: 'altitude', 'course', 'hacc', 'latitude', 'longitude', 'speed'
    df = df[["Timestamp", "X", "Y", "Z", "Type", "altitude", "course", "hacc", "latitude", "longitude", "speed"]].copy()

    # Create columns for each type
    types = df["Type"].unique()
    for t in types:
        for col in ["X", "Y", "Z"]:
            df.loc[:, f"{t}_{col}"] = df.apply(
                lambda row: row[col] if row["Type"] == t else None, axis=1
            )

    # Group by Timestamp and calculate mean for numeric columns
    df_grouped = df.groupby("Timestamp").mean(numeric_only=True).reset_index()

    # Explicitly list columns to retain
    columns_to_keep = ["Timestamp", "altitude", "course", "hacc", "latitude", "longitude", "speed"] + [
        f"{t}_{col}"
        for t in types
        for col in ["X", "Y", "Z"]
        if f"{t}_{col}" in df_grouped.columns
    ]
    df_grouped = df_grouped[columns_to_keep]

    # Round the numeric columns to the desired decimal places
    # Uncomment and modify the line below to set the desired decimal places
    # df_grouped = df_grouped.round(3)

    return df_grouped

In [32]:
data=pd.read_csv('sensorData.csv')
columns_to_check = ["X", "Y", "Z", "altitude", "course", "hacc", "latitude", "longitude", "speed"]
cleaned_data = remove_outliers_and_apply_thresholds(data, columns_to_check, thresholds)
# Apply the function to the data
cleaned_merged_data = merge_and_average(data)
cleaned_merged_data = cleaned_merged_data.drop(['Position_X', 'Position_Y', 'Position_Z'], axis=1)
cleaned_merged_data = cleaned_merged_data.bfill().interpolate()
cleaned_merged_data.head()

  return _methods._mean(a, axis=axis, dtype=dtype,
  ret = ret.dtype.type(ret / rcount)


Unnamed: 0,Timestamp,altitude,course,hacc,latitude,longitude,speed,Orientation_X,Orientation_Y,Orientation_Z,Acceleration_X,Acceleration_Y,Acceleration_Z,AngularVelocity_X,AngularVelocity_Y,AngularVelocity_Z,MagneticField_X,MagneticField_Y,MagneticField_Z
0,2024-06-05 17:31:10,74.205,0.0,11.163,52.383291,4.846156,0.0,103.679619,-50.106435,-68.627135,5.922696,7.574695,2.289901,-0.138972,0.14586,0.020374,-33.297117,-39.835098,2.285697
1,2024-06-05 17:31:11,74.205,0.0,11.163,52.383291,4.846156,0.0,107.826813,-46.96734,-63.829794,6.003333,7.196856,2.966087,0.162069,0.043381,0.000794,-34.748439,-39.003564,-6.358125
2,2024-06-05 17:31:12,67.854,0.0,9.538,52.383274,4.84614,0.0,90.708181,-45.332142,-81.253669,6.734438,7.058159,0.903,0.070753,-0.232262,0.161804,-36.535689,-37.356002,1.454625
3,2024-06-05 17:31:13,64.293,0.0,8.87,52.383275,4.846141,0.0,80.833309,-40.510232,-92.878237,7.500184,6.43596,-0.381709,0.073356,-0.040203,0.011388,-40.775627,-31.031064,10.796625
4,2024-06-05 17:31:14,64.651,0.0,7.744,52.383271,4.84612,0.0,88.275178,-40.379573,-78.754876,7.095991,6.39692,1.34039,-0.338543,0.342033,-0.080732,-40.767752,-30.455439,11.748563
