In [1]:
import plotly.express as px
import os
import pandas as pd
import numpy as np
import plotly.graph_objects as go
import altair as alt
alt.data_transformers.enable("vegafusion")
import math


#import csv
pd.set_option('display.max_rows', None)  # Display all rows

In [2]:
day = "24"
month = "08"
dataset = "ble"

base = -1
base_person = -1

date = day + "_" + month #change date accordingly
date_year = "2023-" + month + "-" + day
if date == "11_10":
    base = 1
elif date == "13_10":
    base = 2
elif date == "24_08":
    base = 0
    base_person = 1
else:
    base = 0

if base == 1 or base ==2:
    folder_path = dataset + '_base/'
elif base_person:
    folder_path = dataset + '_base_person/'
else:
    folder_path = dataset +'_'+ date + '/'

def process_file(file_number, date):
    file_path = os.path.join(folder_path, f'{dataset}_{date}_{file_number}.csv')

    df = pd.read_csv(file_path, parse_dates=['ts'])

    # Remove first empty column if it exists
    if df.shape[1] == 4:

        df = df.drop(df.columns[0], axis=1)
    # Remove entries with 'gabarit' equal to 1 or 2
    df_filt = df[(df['id'] == 2) | (df['id'] == 1)]

    # Write the updated DataFrame back to the CSV file
    df.to_csv(file_path, index = False)
    # Reset the index of the DataFrame
    df_filt = df_filt.reset_index(drop = True)

    return df_filt

df1 = process_file(2,date)
df2 = process_file(1,date)
df3 = process_file(3,date)


In [3]:
def plot_ble(df,station):
    # Plotly line graph
    color_discrete= {'1': 'tomato', '2': 'conrflowerblue'}

    fig = px.line(df, x='ts', y='rssi', color='id', color_discrete_map=color_discrete,
                  title=f'Line Graph for Station {station}')
    fig.update_layout(xaxis_title='Timestamp', yaxis_title='RSSI', legend_title='ID')
    fig.show()
plot_ble(df1,station =1)
plot_ble(df2, station =2)
plot_ble(df3, station = 3)



  v = v.dt.to_pydatetime()



The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result




The behavior of DatetimeProperties.to_pydatetime is deprecated, in a future version this will return a Series containing python datetime objects instead of an ndarray. To retain the old behavior, call `np.array` on the result



In [4]:
def analyse_df(df1, df2, df3, beacon):
    # Filter DataFrames for the given id value and remove rows with RSSI of -100
    df1_id = df1[(df1['id'] == beacon)].reset_index(drop=True)
    df2_id = df2[(df2['id'] == beacon)].reset_index(drop=True)
    df3_id = df3[(df3['id'] == beacon)].reset_index(drop=True)


    # Combine dataframes with an outer merge
    merged_df = df1_id.merge(df2_id, on='ts', how='outer',suffixes = ['1','2']).merge(df3_id, on='ts', how='outer',suffixes = ['','3'])

    merged_df.drop(columns=['id1', 'id2', 'id'], inplace=True)
    merged_df.rename(columns={'rssi': 'rssi3'}, inplace=True)

    merged_df = merged_df.sort_values(by='ts')

    merged_df = merged_df.ffill()
    merged_df = merged_df.reset_index(drop=True)
    

    return merged_df


df_rssi1 = analyse_df(df1,df2,df3, beacon = 1) #beacon 1

df_rssi2 = analyse_df(df1,df2,df3, beacon = 2) #beacon 2
#print(df_rssi2)


In [5]:
# FIND BASE VALUES
def find_ref_ble_1_2(df_rssi1, df_rssi2):

    df_rssi1 = df_rssi1[(df_rssi1['rssi1'] != -100) & (df_rssi1['rssi2'] != -100) & (df_rssi1['rssi3'] != -100)]
    df_rssi2 = df_rssi2[(df_rssi2['rssi1'] != -100) & (df_rssi2['rssi2'] != -100) & (df_rssi2['rssi3'] != -100)]

    ref_values = np.zeros((3,3))
    #############################################################
    #                              station 1 station 2 station 3
    #beacon placed in station 1     X         X         X
    #beacon placed in station 2     X         X         X
    #beacon placed in station 3     X         X         X

    #beacon 2 placed in beggining (1)
    #beacon 1 placed in end on production line (2)
    med1_1 = df_rssi2['rssi1'].median()
    av1_1 = df_rssi2['rssi1'].mean()

    med2_1 = df_rssi2['rssi2'].median()
    av2_1 = df_rssi2['rssi2'].mean()

    med3_1 = df_rssi2['rssi3'].median()
    av_3_1 = df_rssi2['rssi3'].mean()

    print(med1_1, med2_1, med3_1)
    ref_values[0,:] = [av1_1,av2_1,av_3_1]

    med1_2 = df_rssi1['rssi1'].median()
    av1_2 = df_rssi1['rssi1'].mean()

    med2_2 = df_rssi1['rssi2'].median()
    av2_2 = df_rssi1['rssi2'].mean()

    med3_2 = df_rssi1['rssi3'].median()
    av_3_2 = df_rssi1['rssi3'].mean()

    #print(med1_2, med2_2, med3_2)
    ref_values[1,:] = [av1_2,av2_2,av_3_2]


    return ref_values

def find_ref_3(df_rssi1):
    df_rssi1 = df_rssi1[(df_rssi1['rssi1'] != -100) & (df_rssi1['rssi2'] != -100) & (df_rssi1['rssi3'] != -100)]

    # Filter DataFrame based on the time range
    med1_1 = df_rssi1['rssi1'].median()
    av1_1 = df_rssi1['rssi1'].mean()

    med2_1 = df_rssi1['rssi2'].median()
    av2_1 = df_rssi1['rssi2'].mean()

    med3_1 = df_rssi1['rssi3'].median()
    av_3_1 = df_rssi1['rssi3'].mean()

    #print(med1_1, med2_1, med3_1)
    ref_values[2,:] = [av1_1,av2_1,av_3_1]

    return ref_values

#if date == '11_10':
#    ref_values = find_ref_ble_1_2(df_rssi1, df_rssi2)
#    print(ref_values)
#elif date == '13_10':
#    ref_values = find_ref_3(df_rssi1)
#    print(ref_values)
#else:
ref_values=np.array(
[[-66.44549266, -74.26545582, -73.88842253],
 [-60.86128482, -43.43985976, -65.93448369],
 [-68.73783186, -56.43292456, -61.49354318]])




In [6]:
def state_df_person(df):
    #1 fim 18:02:00 -> 18:06:00
    #2 inicio 17:57:30 -> 18:01:00
    #3 monta cargas 18:07:00 -> 18:10:00
    df['real station'] = int(0)

    # Define the time frame
    start_time = pd.to_datetime('2023-08-24 17:57:35')
    end_time = pd.to_datetime('2023-08-24 18:00:40')

    # Set 'real station' to 2 for rows within the specified time frame
    df.loc[(df['ts'] >= start_time) & (df['ts'] <= end_time), 'real station'] = 1


    # Define the time frame
    start_time = pd.to_datetime('2023-08-24 18:02:00')
    end_time = pd.to_datetime('2023-08-24 18:05:20')

    # Set 'real station' to 1 for rows within the specified time frame
    df.loc[(df['ts'] >= start_time) & (df['ts'] <= end_time), 'real station'] = 2

    # Define the time frame
    start_time = pd.to_datetime('2023-08-24 18:07:00')
    end_time = pd.to_datetime('2023-08-24 18:10:00')

    # Set 'real station' to 3 for rows within the specified time frame
    df.loc[(df['ts'] >= start_time) & (df['ts'] <= end_time), 'real station'] = 3

    return df


if date == "24_08":
    #print(df_rssi2)
    df_rssi2 = state_df_person(df_rssi2)

if date == '11_10':
    df_rssi2['real station'] = 1
    df_rssi1['real station'] = 2
    
if date == '13_10':
    df_rssi2['real station'] = 3

print(ref_values)


[[-66.44549266 -74.26545582 -73.88842253]
 [-60.86128482 -43.43985976 -65.93448369]
 [-68.73783186 -56.43292456 -61.49354318]]


In [15]:
def calculate_rms(rssi, rssi_ref):
    # Calculate RMS difference for each receiver   
    rms_diff = np.sqrt(((rssi) - rssi_ref)**2)
    if math.isnan(rssi) or rssi == -100:
        rms_diff = 0

    return rms_diff

def define_weights(df_rssi, ref_values, iterations):
    # Create a DataFrame to store the results of different weight combinations
    results = pd.DataFrame(columns=['weight_station1', 'weight_station2', 'weight_station3', 'weight_rssi', 'error'])
    best_mean_accuracy  = 0 
    best_weights = None 

    # Iterate through weight combinations
    for w_station1 in np.linspace(0, 1, iterations):
        for w_station2 in np.linspace(0, 1, iterations):
            for w_station3 in np.linspace(0, 1, iterations):
                for w_rssi in np.linspace(0, 1, iterations):
                    

                    if w_station1 + w_station2 + w_station3 + w_rssi == 1:
              

                        print("weights:", w_station1, w_station2, w_station3, w_rssi)
                        accuracies = []
                        for index, row in df_rssi.iterrows():
                            if row['real station'] == 0:
                                continue  # Skip rows with real station 0

                            # Calculate weighted RMS values for each station using reference values
                            rms_station = np.zeros(3)

                            for station in range(3):
                                rssi_values = [row['rssi1'], row['rssi2'], row['rssi3']]
                                
                                # Check for -100 or NaN in the rssi values
                                #if rssi_values[station] != -100 and not np.isnan(rssi_values[station]):
                                rms_station[station] = (w_station1 * calculate_rms(rssi_values[0], ref_values[station, 0]) + 
                                                        w_station2 * calculate_rms(rssi_values[1], ref_values[station, 1]) +
                                                        w_station3 * calculate_rms(rssi_values[2], ref_values[station, 2]))
                                print("rms: ",rms_station)

                            # Determine the station with the minimum weighted RMS value
                            station_min_ref = np.argmin(rms_station) + 1

                            # Calculate the station determined by the smallest RSSI
                            station_min_rssi = np.argmin([row['rssi1'], row['rssi2'], row['rssi3']]) + 1

                            # Calculate the weighted combination of station predictions
                            station_pred = int((w_station1 + w_station2 + w_station3) * station_min_ref + w_rssi * station_min_rssi)

                            if station_pred == row['real station']:
                                accuracies.append(1)
                            else:
                                accuracies.append(0)

                        mean_accuracy = sum(accuracies) / len(accuracies)

                        print("accuracy: ", mean_accuracy)
                        if mean_accuracy > best_mean_accuracy:
                            best_mean_accuracy = mean_accuracy
                            best_weights = {
                                'weight_station1': w_station1,
                                'weight_station2': w_station2,
                                'weight_station3': w_station3,
                                'weight_rssi': w_rssi,
                            }
                    else:
                        continue
    #best_weights = results.loc[results['error'].idxmin()]
 
    print(best_weights)
    return best_weights


def classify_points(df_rssi, best_weights, ref_values):

    for index, row in df_rssi.iterrows():
        # Calculate weighted RMS values for each station using reference values
        
        rms_station = np.zeros(3)

        #if index in range(0,10):
        #    print(f"i: {index}")

        for station in range(3):
            rms_station[station] = (
                best_weights['weight_station1'] * calculate_rms(row['rssi1'], ref_values[station, 0]) +
                best_weights['weight_station2'] * calculate_rms(row['rssi2'], ref_values[station, 1]) +
                best_weights['weight_station3'] * calculate_rms(row['rssi3'], ref_values[station, 2])
            )
            """
            if index in range(0,10):
                print(f"station: {station+1}")
                print(f"rssi1: {row['rssi1']}")
                print(calculate_rms(row['rssi1'], ref_values[station, 0]))
                print(f"rssi2: {row['rssi2']}")
                print(calculate_rms(row['rssi2'], ref_values[station, 1]))
                print(f"rssi3: {row['rssi3']}")
                print(calculate_rms(row['rssi3'], ref_values[station, 2]))
                print()"""

        #FALTA RMSS MIN
        #print(f"rms station: {rms_station}")
        # Determine the station with the minimum weighted RMS value
        station_min_ref = np.argmin(rms_station) + 1

        
        station_min_rssi = np.argmin([row['rssi1'], row['rssi2'], row['rssi3']]) + 1

        # Calculate the weighted combination of station predictions

        #print(f"min rssi: {min_rssi_station}")

        station_pred = station_min_rssi * best_weights['weight_rssi'] + station_min_ref * (1 - best_weights['weight_rssi'])
        station_pred = (round(station_pred))

        #if index in range(0,10):
        #    print(station_pred)

        #print()
        #print()
        df_rssi.loc[index, 'station'] = station_pred
        #int(station_pred)
     
    return df_rssi

def calculate_accuracy(df_rssi):
    accuracy = np.zeros(3)  # Initialize an array to store accuracy values for each station

    for station in range(1, 4):
        total_points = 0
        total_correct = 0

        for index, row in df_rssi.iterrows():
            if row['real station'] == 0:
                continue  # Skip rows with real station 0

            if row['real station'] == station:
                total_points += 1
                if row['station'] == row['real station']:
                    total_correct += 1

        # Calculate accuracy for the current station
        accuracy[station - 1] = total_correct / total_points * 100 if total_points > 0 else 0
    print("Accuracy: ",accuracy)
    return #accuracy


if date == '24_08':
    #print(df_rssi2)
    #df_rssi2['real station'] = df_rssi2['real station'].replace(3, 2)
    best_weights = define_weights(df_rssi2.loc[:200], ref_values, iterations=3)
    df_rssi2_ = classify_points(df_rssi2.loc[:200],best_weights,ref_values)
    #df_rssi2['station'] = df_rssi2['station'].replace(3, 2)
    calculate_accuracy(df_rssi2)
    #print(df_rssi2)
else:
    best_weights = {
    'weight_station1': 0.2,
    'weight_station2': 0.2,
    'weight_station3': 0.2,
    'weight_rssi': 0.4,
    'error': 1.01
}

if date == '11_10':
    #df_rssi2 = classify_points(df_rssi2,best_weights,ref_values)
    #calculate_accuracy(df_rssi2)

    df_rssi1 = classify_points(df_rssi1,best_weights,ref_values)
    calculate_accuracy(df_rssi1)
    #print(df_rssi1)

elif date == '13_10':
    df_rssi2 = classify_points(df_rssi2,best_weights,ref_values)
    calculate_accuracy(df_rssi2)

elif date != '24_08':
    print("Beacon 1")
    df_rssi1 = classify_points(df_rssi1,best_weights,ref_values)
    display(df_rssi1.loc[:200])
    #display(df_rssi1)

    print("Beacon 2")
    df_rssi2 = classify_points(df_rssi2,best_weights,ref_values)
    display(df_rssi2.loc[:50])



#print(ref_values)
display(df_rssi2_)



#[[-66.44549266 -74.26545582 -73.88842253]
# [-60.86128482 -43.43985976 -65.93448369]
# [-68.73783186 -56.43292456 -61.49354318]]
 

weights: 0.0 0.0 0.0 1.0
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  [0. 0. 0.]
rms:  



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



KeyError: 'station'

In [12]:
def remove_outliers(df):
    # Iterate through the DataFrame and update values
    for i in range(1, len(df) - 1):
        if df.at[i - 1, 'station'] == df.at[i + 1, 'station'] and df.at[i, 'station'] != df.at[i - 1, 'station']:
            df.at[i, 'station'] = df.at[i - 1, 'station']

    return df

def  remove_outliers2(df):
    # Iterate through the DataFrame and update values
    for i in range(1, len(df) - 2):
        if df.at[i - 1, 'station'] == df.at[i + 2, 'station'] and df.at[i, 'station'] == df.at[i + 1, 'station'] and df.at[i, 'station'] != df.at[i - 1, 'station']:
            df.at[i, 'station'] = df.at[i - 1, 'station']
            df.at[i + 1, 'station'] = df.at[i - 1, 'station']

    return df


#eliminiate_outliers(df_rssi1)
#print(df_rssi1)
#df_rssi2 = remove_outliers(df_rssi2)
#df_rssi2 = remove_outliers2(df_rssi2)
df_rssi1 = remove_outliers(df_rssi1)
df_rssi1 = remove_outliers2(df_rssi1)

#calculate_accuracy(df_rssi1)
calculate_accuracy(df_rssi1)


Accuracy:  [  0. 100.   0.]


In [13]:

def time_in_station_ble(df_rssi):
    # Initialize time variables for each station
    time_in_1 = pd.to_timedelta('0 seconds')
    time_in_2 = pd.to_timedelta('0 seconds')
    time_in_3 = pd.to_timedelta('0 seconds')

    for index, row in df_rssi.iterrows():
        # Calculate time spent in each station based on station assignments
        if row['station'] == 1:
            if index == 0:
                continue
            time_in_1 += df_rssi.loc[index, 'ts'] - df_rssi.loc[index - 1, 'ts']

        if row['station'] == 2:
            if index == 0:
                continue
            time_in_2 += df_rssi.loc[index, 'ts'] - df_rssi.loc[index - 1, 'ts']

        if row['station'] == 3:
            if index == 0:
                continue
            time_in_3 += df_rssi.loc[index, 'ts'] - df_rssi.loc[index - 1, 'ts']

    # Calculate total time and percentages for each station
    total_time = time_in_1 + time_in_2 + time_in_3
    perc1 = time_in_1 / total_time * 100
    perc1 = round(perc1, 2)
    perc2 = time_in_2 / total_time * 100
    perc2 = round(perc2, 2)
    perc3 = time_in_3 / total_time * 100
    perc3 = round(perc3, 2)
    print(time_in_1,time_in_2,time_in_3)
    print(time_in_1+time_in_2+time_in_3)
    print(f'station1: {perc1}%,    station2: {perc2}%,    station3: {perc3}%')
    return


print("Beacon 1")
#print(df_rssi1)
time_in_station_ble(df_rssi1)
display(df_rssi1.loc[:200])
#display(df_rssi1)

print("Beacon 2")
time_in_station_ble(df_rssi2)
display(df_rssi2.loc[:50])




Beacon 1
0 days 00:00:00 0 days 11:59:08 0 days 00:00:00
0 days 11:59:08
station1: 0.0%,    station2: 100.0%,    station3: 0.0%


Unnamed: 0,ts,rssi1,rssi2,rssi3,real station,station
0,2023-10-11 19:00:38,,,-76.0,2,2.0
1,2023-10-11 19:01:05,,-40.0,-76.0,2,2.0
2,2023-10-11 19:02:02,,-40.0,-62.0,2,2.0
3,2023-10-11 19:02:44,,-40.0,-75.0,2,2.0
4,2023-10-11 19:02:56,,-40.0,-57.0,2,2.0
5,2023-10-11 19:03:26,,-40.0,-75.0,2,2.0
6,2023-10-11 19:03:33,,-40.0,-61.0,2,2.0
7,2023-10-11 19:04:00,,-46.0,-61.0,2,2.0
8,2023-10-11 19:04:06,,-40.0,-61.0,2,2.0
9,2023-10-11 19:04:15,,-40.0,-76.0,2,2.0


Beacon 2


KeyError: 'station'

In [10]:
# Resampling with forward fill
def resample_df(df, freq):
    # Set the 'Timestamp' column as the DataFrame index
    df.set_index('ts', inplace=True)

    # Resample the DataFrame with a desired frequency (e.g., 10 minutes) and forward fill
    resampled_df = df.resample(freq).ffill()

    # Reset the index to have the timestamp as a column again
    resampled_df = resampled_df.reset_index()

    return resampled_df

#display(df1.loc[:50])
#df_rssi1_ = resample_df(df_rssi1,freq = '5S')
df_rssi1_ = resample_df(df_rssi1_,freq = '5S')
df_rssi2_ = resample_df(df_rssi2_,freq = '5S')

In [13]:
    
def plot_ble_state(df_rssi,beacon):
 
    x = np.array(df_rssi['ts'])
    y = np.array(df_rssi['station'])
    fig1 = go.Figure()
    fig1.add_trace(go.Scatter(x=x, y=y, name="hv",
                    line_shape='hv'))
    fig1.show()


plot = 0

if plot:
    plot_ble_state(df_rssi1_,beacon = 1)
    plot_ble_state(df_rssi2_, beacon = 2)



   

In [22]:
def strip_plot(df, beacon,include_3):
    df['ts'] = df['ts'].astype(str)  
    if include_3:
        y_scale = alt.Scale(domain=[1, 2, 3])  # Set the y-axis range from 1 to 3
        # Define color encoding based on the station value
        color = alt.Color('station:N', scale=alt.Scale(
            domain=[1, 2, 3], range=['cornflowerblue', 'darkorange', 'violet']
        ))
    else:
        y_scale = alt.Scale(domain=[1, 2])  # Set the y-axis range from 1 to 3
        color = alt.Color('station:N', scale=alt.Scale(
        domain=[1, 2], range=['cornflowerblue', 'darkorange']))


    x_domain = [pd.to_datetime(f'{date_year} 17:57:00'), pd.to_datetime(f'{date_year} 18:10:00')]
    #1 inicio 17:57:30 -> 18:01:00
    # fim 18:02:00 -> 18:06:00
    #3 monta cargas 18:07:00 -> 18:10:00
    domain = [(f'{date_year} 17:57:00'),(f'{date_year} 18:10:00')]

    chart = alt.Chart(df).mark_tick().encode(
        x=alt.X('ts:T', title='Timestamp'), #scale=alt.Scale(domain=domain)),
        y=alt.Y('station:O', title='Station'),#, scale=y_scale),
        color=color  # Apply the color encoding
    )
    #.properties(
    #    title=f'Classified station for beacon {beacon}',
    #    width=300  # Set the width to 600 pixels (adjust as needed)
        #height=150  # Set the height to 400 pixels (adjust as needed)
    #)

    return chart



    
#chart1 = strip_plot(df_rssi1_,beacon=1)
chart2 = strip_plot(df_rssi2_,beacon=1,include_3=0)

#chart1 | 
chart2

MaxRowsError: The number of rows in your dataset is greater than the maximum allowed (5000).

Try enabling the VegaFusion data transformer which raises this limit by pre-evaluating data
transformations in Python.
    >> import altair as alt
    >> alt.data_transformers.enable("vegafusion")

Or, see https://altair-viz.github.io/user_guide/large_datasets.html for additional information
on how to plot large datasets.

alt.Chart(...)