In [1]:
from fastcore.basics import Path, AttrDict

config = AttrDict(
    challenge_data_raw_dir = Path('../../data/1_dataset_raw/'),
    challenge_data_clean_dir = Path('../../data/2_dataset_clean/'),
    challenge_data_processed_dir = Path('../../data/3_dataset_processed/'),
    challenge_data_features_dir = Path('../../data/4_dataset_features/'),
    df_merged_small = Path('../../data/2_dataset_clean/df_merged_small.csv'),
    df_merged_medium = Path('../../data/2_dataset_clean/df_merged_medium.csv'),
    df_merged_large = Path('../../data/2_dataset_clean/df_merged_large.csv'),
    df_merged_full = Path('../../data/2_dataset_clean/df_merged_full.csv'),
    valid_ratio = 0.1,
    lag_steps = 5,
    tolerance= 6, # Default evaluation tolerance
)

# read df_merged_small
import pandas as pd
df_merged = pd.read_csv(config.df_merged_small)

df_merged

Unnamed: 0,Timestamp,Eccentricity,Semimajor Axis (m),Inclination (deg),RAAN (deg),Argument of Periapsis (deg),True Anomaly (deg),Latitude (deg),Longitude (deg),Altitude (m),X (m),Y (m),Z (m),Vx (m/s),Vy (m/s),Vz (m/s),ObjectID,TimeIndex,EW,NS
0,2022-09-01 00:00:00.000000Z,0.000127,4.216594e+07,0.118065,89.710324,202.940616,216.180960,-0.004704,168.980008,3.579214e+07,-3.608303e+07,2.182526e+07,74580.151140,-1590.894015,-2630.634553,3.250794,545,0,SS-NK,SS-NK
1,2022-09-01 02:00:00.000000Z,0.005056,4.238466e+07,0.118038,89.650304,60.229587,29.090881,-0.006828,169.036746,3.581902e+07,-4.219026e+07,7.579635e+05,86926.450274,-62.865297,-3079.607106,0.090788,545,1,,
2,2022-09-01 04:00:00.000000Z,0.005062,4.238501e+07,0.118114,89.518050,60.424355,59.091844,-0.007215,169.018089,3.589587e+07,-3.696135e+07,-2.051694e+07,75836.682875,1480.580493,-2694.723791,-3.098809,545,2,,
3,2022-09-01 06:00:00.000000Z,0.005054,4.238422e+07,0.118364,89.395612,60.350090,89.219001,-0.005842,168.866409,3.600208e+07,-2.184975e+07,-3.631348e+07,44344.181267,2619.892981,-1594.478385,-5.446713,545,3,,
4,2022-09-01 08:00:00.000000Z,0.005044,4.238313e+07,0.118658,89.353924,60.060612,119.326647,-0.003103,168.560755,3.610888e+07,-9.333845e+05,-4.247677e+07,940.970165,3058.128946,-80.689078,-6.334788,545,4,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1086955,2023-02-28 14:00:00.000000Z,0.000097,4.216560e+07,0.202295,76.581292,105.475645,54.784362,-0.003213,-130.995683,3.578511e+07,-2.306152e+07,-3.529735e+07,50279.886539,2573.928194,-1681.980257,-10.217899,724,2167,,
1086956,2023-02-28 16:00:00.000000Z,0.000086,4.216585e+07,0.202724,76.572324,112.477268,77.874419,-0.045222,-130.995132,3.578695e+07,-2.262640e+06,-4.210432e+07,-26807.586644,3070.192414,-165.240527,-10.701817,724,2168,,
1086957,2023-02-28 18:00:00.000000Z,0.000074,4.216590e+07,0.201503,76.418511,113.179844,107.405821,-0.074780,-130.997097,3.578869e+07,1.914591e+07,-3.756949e+07,-96478.786471,2739.408169,1395.816744,-8.212063,724,2169,,
1086958,2023-02-28 20:00:00.000000Z,0.000068,4.216563e+07,0.201734,76.512042,102.841679,147.728412,-0.083991,-131.001247,3.578992e+07,3.539775e+07,-2.291559e+07,-140014.872786,1670.844246,2580.772766,-3.601286,724,2170,,


In [2]:
list_object_id = list(df_merged['ObjectID'].unique())

In [3]:
def get_df_merged_query(df_merged, list_object_id, position):
    # Retrieve the ObjectID from the list based on the given position
    object_id = list_object_id[position]
    print("Object id is : {}".format(object_id))
    
    # Query df_merged where ObjectID matches the retrieved object_id
    # Note: @object_id inside the query string is used to reference the variable object_id
    return df_merged.query('ObjectID == @object_id')

df_merged_query = get_df_merged_query(df_merged, list_object_id, position = 10)


Object id is : 592


In [None]:
df_merged_query

In [None]:
df_merged_query.to_csv("df_merged_query.csv")

In [None]:
df_merged_query

# Plot other features

In [None]:
import pandas as pd
import plotly.graph_objects as go


def plot_orbital_parameter_with_events(df, parameter_name):
    # Ensure 'Timestamp' is in datetime format
    if not pd.api.types.is_datetime64_any_dtype(df['Timestamp']):
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])

    # Extract the unique ObjectID (assuming all rows have the same ObjectID)
    object_id = df['ObjectID'].iloc[0]
    
    # Create the title with ObjectID and the parameter being plotted
    title = f'{parameter_name} Evolution for Object ID: {object_id} with NS and EW Events'

    # Create the base figure
    fig = go.Figure()

    # Add line for the specified orbital parameter
    fig.add_trace(go.Scatter(
        x=df['Timestamp'],
        y=df[parameter_name],
        mode='lines+markers',
        name=parameter_name,
        line=dict(color='blue'),
        marker=dict(size=4)  # Default marker size for the line plot
    ))
    
    # Add markers for NS events, ignoring NaN values, with increased size and a distinctive color
    for event in df['NS'].dropna().unique():
        df_event = df[df['NS'] == event]
        fig.add_trace(go.Scatter(
            x=df_event['Timestamp'],
            y=df_event[parameter_name],
            mode='markers',
            name=f'NS: {event}',
            marker=dict(symbol='circle', size=10, color='yellow')  # Larger, yellow markers for NS events
        ))
    
    # Add markers for EW events, ignoring NaN values, with increased size and a distinctive color
    for event in df['EW'].dropna().unique():
        df_event = df[df['EW'] == event]
        fig.add_trace(go.Scatter(
            x=df_event['Timestamp'],
            y=df_event[parameter_name],
            mode='markers',
            name=f'EW: {event}',
            marker=dict(symbol='x', size=10, color='magenta')  # Larger, magenta markers for EW events
        ))
    
    # Update plot layout
    fig.update_layout(
        title=title,
        xaxis_title='Timestamp',
        yaxis_title=parameter_name,
        legend_title='Event Type',
        legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)  # Adjust legend position
    )

    fig.show()

In [None]:


plot_orbital_parameter_with_events(df_merged_query, 'Eccentricity')
plot_orbital_parameter_with_events(df_merged_query, 'Semimajor Axis (m)')
plot_orbital_parameter_with_events(df_merged_query, 'Inclination (deg)')
plot_orbital_parameter_with_events(df_merged_query, 'RAAN (deg)')
plot_orbital_parameter_with_events(df_merged_query, 'Argument of Periapsis (deg)')
plot_orbital_parameter_with_events(df_merged_query, 'True Anomaly (deg)')
plot_orbital_parameter_with_events(df_merged_query, 'Latitude (deg)')
plot_orbital_parameter_with_events(df_merged_query, 'Longitude (deg)')
plot_orbital_parameter_with_events(df_merged_query, 'Altitude (m)')
plot_orbital_parameter_with_events(df_merged_query, 'X (m)')
plot_orbital_parameter_with_events(df_merged_query, 'Y (m)')
plot_orbital_parameter_with_events(df_merged_query, 'Z (m)')
plot_orbital_parameter_with_events(df_merged_query, 'Vx (m/s)')
plot_orbital_parameter_with_events(df_merged_query, 'Vy (m/s)')
plot_orbital_parameter_with_events(df_merged_query, 'Vz (m/s)')


#

# Plot with isolation of feature

## General function to plot features

In [5]:

list_of_20_object_id = list(df_merged['ObjectID'].unique())[:10]
list_of_column_names = df_merged.columns.tolist()
import pandas as pd
import plotly.graph_objects as go

def plot_data_with_feature(df, feature_name_to_observe):
    # Ensure 'Timestamp' is in datetime format
    if not pd.api.types.is_datetime64_any_dtype(df['Timestamp']):
        df['Timestamp'] = pd.to_datetime(df['Timestamp'])
        
    # Find all columns that contain "Eccentricity" in the column name
    feature_columns = [col for col in df.columns if "feature" in col]
    print("feature_columns", feature_columns)

    # Extract the unique ObjectID (assuming all rows have the same ObjectID)
    object_id = df['ObjectID'].iloc[0]
    
    # Create the base figure
    fig = go.Figure()

    fig.add_trace(go.Scatter(
        x=df['Timestamp'],
        y=df[feature_name_to_observe],
        mode='lines+markers',
        name=feature_name_to_observe,
        line=dict(color='blue'),
        marker=dict(size=4)  # Default marker size for the line plot
    ))

    for parameter_name in feature_columns:
        # Create the title with ObjectID and the parameter being plotted
        title = f'{parameter_name} Evolution for Object ID: {object_id} with NS and EW Events'

        # Add line for the current eccentricity parameter
        fig.add_trace(go.Scatter(
            x=df['Timestamp'],
            y=df[parameter_name],
            mode='lines+markers',
            name=parameter_name,
            line=dict(color='red'),
            marker=dict(size=4)  # Default marker size for the line plot
        ))

    # Add markers for NS events, ignoring NaN values, with increased size and a distinctive color
    for event in df['NS'].dropna().unique():
            df_event = df[df['NS'] == event]
            fig.add_trace(go.Scatter(
                x=df_event['Timestamp'],
                y=df_event[feature_name_to_observe],
                mode='markers',
                name=f'NS: {event}',
                marker=dict(symbol='circle', size=10, color='yellow')  # Larger, yellow markers for NS events
            ))
        
    # Add markers for EW events, ignoring NaN values, with increased size and a distinctive color
    for event in df['EW'].dropna().unique():
        df_event = df[df['EW'] == event]
        fig.add_trace(go.Scatter(
            x=df_event['Timestamp'],
            y=df_event[feature_name_to_observe],
            mode='markers',
            name=f'EW: {event}',
            marker=dict(symbol='x', size=10, color='magenta')  # Larger, magenta markers for EW events
            ))
    
        # Update plot layout for the last plotted parameter (consider adjusting if needed)
        fig.update_layout(
            title=title,
            xaxis_title='Timestamp',
            yaxis_title=parameter_name,
            legend_title='Event Type',
            legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)  # Adjust legend position
        )

    fig.show()


### Eccentricity

In [None]:
df_merged.columns.tolist()

In [None]:

list_of_20_object_id = list(df_merged['ObjectID'].unique())[:10]
list_of_column_names = df_merged.columns.tolist()
import pandas as pd
import plotly.graph_objects as go

data_name = "Eccentricity"

for object_id in list_of_20_object_id:
    df_merged_query = df_merged.query("ObjectID == @object_id")
    
   # Create the new features
    df_merged_query['difference_feature'] = df_merged_query['Eccentricity'].diff()
    df_merged_query['rolling_max_feature'] = df_merged_query['Eccentricity'].rolling(12).max()
    df_merged_query['rolling_min_feature'] = df_merged_query['Eccentricity'].rolling(12).min()
    df_merged_query['envelope_height_feature'] = df_merged_query['rolling_max_feature'] - df_merged_query['rolling_min_feature']
    df_merged_query['envelope_height_variation_feature'] = df_merged_query['envelope_height_feature'].diff()
    df_merged_query['envelope_diff_feature'] = df_merged_query['rolling_max_feature'].diff()
    df_merged_query['envelope_accel_feature'] = df_merged_query['envelope_diff_feature'].diff()
    
    
    
    plot_data_with_feature(df = df_merged_query, feature_name_to_observe=data_name)

In [None]:
df_merged_query = df_merged.query("ObjectID == 1760")
df_merged_query.to_csv("df_merged_query.csv")

In [None]:
from statsmodels.tsa.seasonal import seasonal_decompose
import matplotlib.pyplot as plt

# Convert the "Timestamp" column to datetime and set it as the index
df_merged_query['Timestamp'] = pd.to_datetime(df_merged_query['Timestamp'])
df_merged_query.set_index('Timestamp', inplace=True)

# Ensure the dataset has a consistent frequency
df_merged_query = df_merged_query.asfreq(pd.infer_freq(df_merged_query.index))

# Perform seasonal decomposition to identify the trend, seasonal, and residual components
decomposition = seasonal_decompose(df_merged_query['Eccentricity'], model='additive', period=12)  # Adjust the period according to your data's seasonality

# Plot the decomposed components
decomposition_plot = decomposition.plot()
plt.show()


In [None]:
# Remove the seasonality from the "Eccentricity" data
eccentricity_deseasonalized = df_merged_query['Eccentricity'] - decomposition.seasonal - decomposition.trend

eccentricity_deseasonalized.plot()

# Vx (m/s)

In [None]:
df_merged_query.columns.tolist()

In [None]:

list_of_20_object_id = list(df_merged['ObjectID'].unique())[:10]
list_of_column_names = df_merged.columns.tolist()
import pandas as pd
import plotly.graph_objects as go

data_name = "Vx (m/s)"

for object_id in list_of_20_object_id:
    df_merged_query = df_merged.query("ObjectID == @object_id")
    
   # Create the new features
    # df_merged_query['vx_diff_feature'] = df_merged_query[data_name].diff()
    # df_merged_query['vx_accel_feature'] = df_merged_query['vx_diff_feature'].diff()
    # df_merged_query['daily_mean_feature'] = df_merged_query[data_name].rolling(12).mean()
    # df_merged_query['weekly_mean_feature'] = df_merged_query[data_name].rolling(12 * 7).mean()
    # df_merged_query['monthly_mean_feature'] = df_merged_query[data_name].rolling(12 * 7 * 30).mean()
    
    # df_merged_query['daily_std_feature'] = df_merged_query[data_name].rolling(12).std()
    # df_merged_query['weekly_std_feature'] = df_merged_query[data_name].rolling(12 * 7).std()
    # df_merged_query['monthly_std_feature'] = df_merged_query[data_name].rolling(12 * 7 * 30).std()
    
    df_merged_query['rolling_max_feature'] = df_merged_query[data_name].rolling(12).max()
    df_merged_query['rolling_min_feature'] = df_merged_query[data_name].rolling(12).min()
    df_merged_query['envelope_height_feature'] = df_merged_query['rolling_max_feature'] - df_merged_query['rolling_min_feature']
    df_merged_query['envelope_height_variation_feature'] = df_merged_query['envelope_height_feature'].diff()
    df_merged_query['envelope_max_daily_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12).std()
    df_merged_query['envelope_max_weekly_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12*7).std()
    df_merged_query['envelope_max_monthly_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12*7*30).std()
    # df_merged_query['envelope_diff_feature'] = df_merged_query['rolling_max_feature'].diff()
    # df_merged_query['envelope_accel_feature'] = df_merged_query['envelope_diff_feature'].diff()
    
    
    
    plot_data_with_feature(df = df_merged_query, feature_name_to_observe=data_name)

## Periapsis

In [None]:
df_merged_query.columns.tolist()

In [None]:

list_of_20_object_id = list(df_merged['ObjectID'].unique())[:10]
list_of_column_names = df_merged.columns.tolist()
import pandas as pd
import plotly.graph_objects as go

data_name = "Argument of Periapsis (deg)"

for object_id in list_of_20_object_id:
    df_merged_query = df_merged.query("ObjectID == @object_id")
    
   # Create the new features
    # df_merged_query['vx_diff_feature'] = df_merged_query[data_name].diff()
    # df_merged_query['vx_accel_feature'] = df_merged_query['vx_diff_feature'].diff()
    # df_merged_query['daily_mean_feature'] = df_merged_query[data_name].rolling(12).mean()
    # df_merged_query['weekly_mean_feature'] = df_merged_query[data_name].rolling(12 * 7).mean()
    # df_merged_query['monthly_mean_feature'] = df_merged_query[data_name].rolling(12 * 7 * 30).mean()
    
    df_merged_query['daily_std_feature'] = df_merged_query[data_name].rolling(12).std()
    df_merged_query['weekly_std_feature'] = df_merged_query[data_name].rolling(12 * 7).std()
    
    df_merged_query['daily_std_diff_feature'] = df_merged_query['daily_std_feature'].diff()
    
    # smoothed version
    df_merged_query['smoothed_daily_std_feature'] = df_merged_query['daily_std_feature'].rolling(12).mean()
    df_merged_query['daily_std_diff_smoothed_feature'] = df_merged_query['smoothed_daily_std_feature'].diff()
    
    # df_merged_query['monthly_std_feature'] = df_merged_query[data_name].rolling(12 * 7 * 30).std()
    
    # df_merged_query['rolling_max_feature'] = df_merged_query[data_name].rolling(12).max()
    # df_merged_query['rolling_min_feature'] = df_merged_query[data_name].rolling(12).min()
    # df_merged_query['envelope_height_feature'] = df_merged_query['rolling_max_feature'] - df_merged_query['rolling_min_feature']
    # df_merged_query['envelope_height_variation_feature'] = df_merged_query['envelope_height_feature'].diff()
    # df_merged_query['envelope_max_daily_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12).std()
    # df_merged_query['envelope_max_weekly_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12*7).std()
    # df_merged_query['envelope_max_monthly_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12*7*30).std()
    # df_merged_query['envelope_diff_feature'] = df_merged_query['rolling_max_feature'].diff()
    # df_merged_query['envelope_accel_feature'] = df_merged_query['envelope_diff_feature'].diff()
    
    
    
    plot_data_with_feature(df = df_merged_query, feature_name_to_observe=data_name)

# Latitude

# Latitude in degrees

In [None]:
df_merged_query.columns.tolist()

# Altitude

In [6]:

list_of_20_object_id = list(df_merged['ObjectID'].unique())[:10]
list_of_column_names = df_merged.columns.tolist()
import pandas as pd
import plotly.graph_objects as go

data_name = "Altitude (m)"

for object_id in list_of_20_object_id:
    df_merged_query = df_merged.query("ObjectID == @object_id")
    
   # Create the new features
    # df_merged_query['vx_diff_feature'] = df_merged_query[data_name].diff()
    # df_merged_query['vx_accel_feature'] = df_merged_query['vx_diff_feature'].diff()
    # df_merged_query['daily_mean_feature'] = df_merged_query[data_name].rolling(12).mean()
    # df_merged_query['weekly_mean_feature'] = df_merged_query[data_name].rolling(12 * 7).mean()
    # df_merged_query['monthly_mean_feature'] = df_merged_query[data_name].rolling(12 * 7 * 30).mean()
    
    df_merged_query['daily_std_feature'] = df_merged_query[data_name].rolling(12).std()
    # df_merged_query['weekly_std_feature'] = df_merged_query[data_name].rolling(12 * 7).std()
    
    # df_merged_query['daily_std_diff_feature'] = df_merged_query['daily_std_feature'].diff()
    
    # # smoothed version
    # df_merged_query['smoothed_daily_std_feature'] = df_merged_query['daily_std_feature'].rolling(12).mean()
    # df_merged_query['daily_std_diff_smoothed_feature'] = df_merged_query['smoothed_daily_std_feature'].diff()
    
    # df_merged_query['monthly_std_feature'] = df_merged_query[data_name].rolling(12 * 7 * 30).std()
    
    # df_merged_query['rolling_max_feature'] = df_merged_query[data_name].rolling(12).max()
    # df_merged_query['rolling_min_feature'] = df_merged_query[data_name].rolling(12).min()
    # df_merged_query['envelope_height_feature'] = df_merged_query['rolling_max_feature'] - df_merged_query['rolling_min_feature']
    # df_merged_query['envelope_height_variation_feature'] = df_merged_query['envelope_height_feature'].diff()
    # df_merged_query['envelope_max_daily_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12).std()
    # df_merged_query['envelope_max_weekly_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12*7).std()
    # df_merged_query['envelope_max_monthly_std_feature'] = df_merged_query['rolling_max_feature'].rolling(12*7*30).std()
    # df_merged_query['envelope_diff_feature'] = df_merged_query['rolling_max_feature'].diff()
    # df_merged_query['envelope_accel_feature'] = df_merged_query['envelope_diff_feature'].diff()
    
    
    
    plot_data_with_feature(df = df_merged_query, feature_name_to_observe=data_name)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_merged_query['daily_std_feature'] = df_merged_query[data_name].rolling(12).std()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Timestamp'] = pd.to_datetime(df['Timestamp'])


feature_columns ['daily_std_feature']


feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy





A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



feature_columns ['daily_std_feature']


feature_columns ['daily_std_feature']




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



# Latitude

In [None]:

list_of_20_object_id = list(df_merged['ObjectID'].unique())[:10]
list_of_column_names = df_merged.columns.tolist()
import pandas as pd
import plotly.graph_objects as go

data_name = "Latitude (deg)"

for object_id in list_of_20_object_id:
    df_merged_query = df_merged.query("ObjectID == @object_id")
    
   # Create the new features
    df_merged_query['latitude_diff_feature'] = df_merged_query[data_name].diff()
    df_merged_query['rolling_max_feature'] = df_merged_query[data_name].rolling(12).max()
    df_merged_query['rolling_min_feature'] = df_merged_query[data_name].rolling(12).min()
    df_merged_query['envelope_height_feature'] = df_merged_query['rolling_max_feature'] - df_merged_query['rolling_min_feature']
    df_merged_query['envelope_height_variation_feature'] = df_merged_query['envelope_height_feature'].diff()
    df_merged_query['envelope_diff_feature'] = df_merged_query['rolling_max_feature'].diff()
    df_merged_query['envelope_accel_feature'] = df_merged_query['envelope_diff_feature'].diff()
    
    
    
    plot_data_with_feature(df = df_merged_query, feature_name_to_observe=data_name)

# Vx (m/s)

In [None]:
df_merged_query.columns.tolist()