In [None]:
import os
import pandas as pd
from Helper_functions import load_and_combine_parquet_files, average_the_values, apply_kalman_filter, parallel_kalman_filter
from Ploting_functions import all_plot
directory_path = os.getcwd()

## 1. Data

### 1.1 Loading partical parquets and combining the data together

In [None]:
combined_results = load_and_combine_parquet_files(f"{directory_path}/data")

napetost_all_df = combined_results["napetost_all"]
napetost_wavelet_df = combined_results["napetost_wavelet"]
tok_all_df = combined_results["tok_all"]
tok_wavelet_df = combined_results["tok_wavelet"]

### 1.2 Loading already combined data in, with Kalman filtering

In [None]:
napetost_all_df_pre = pd.read_parquet(f"{directory_path}/data/joined_napetost_wavelet_df.parquet")

### 1.3 Loading in metadata

In [None]:
metadata = pd.read_excel(f"{directory_path}/data/metadata.xlsx")

## 2. Analysis

### 2.1 Extracting frequency of stimulation from data

In [None]:
# Calculate the total number of data points for each file
data_points_per_file = napetost_wavelet_df.groupby(level=0).size()

# Calculate the sampling frequency for each file
sampling_frequency_per_file = data_points_per_file / 900

# Assign the sampling frequency to a new column in the DataFrame
napetost_wavelet_df['sampling frequency'] = sampling_frequency_per_file.reindex(napetost_wavelet_df.index, level=0)
# Find unique sampling frequencies and filter the DataFrame to keep only unique ones
unique_sampling_frequencies = napetost_wavelet_df['sampling frequency'].drop_duplicates()
# Filter the original DataFrame to show rows with unique sampling frequencies
unique_frequency_df = napetost_wavelet_df[napetost_wavelet_df['sampling frequency'].isin(unique_sampling_frequencies)]

# # Display the full rows for unique sampling frequencies
# print(unique_sampling_frequencies.to_markdown())

### 2.2 Adding Kalmant post avereged filter

In [None]:
average_napetost_all_df = average_the_values(napetost_all_df, x_col="napetost_x", y_col="napetost_y", interval_duration_seconds = 5)
average_napetost_all_df
apply_kalman_filter(average_napetost_all_df, x_col="x_values", y_col="averaged")

### 2.3 Extracting protokols for voltage/current measurments

In [None]:
df = napetost_all_df_pre.reset_index()

# Extract unique file names from the 'index' column and convert to DataFrame
unique_files = pd.DataFrame(df['index'].unique(), columns=['file_name'])

# Extract datetime information from the file_name and create a new column
unique_files['datetime'] = pd.to_datetime(
    unique_files['file_name'].str.extract(r'(\d{2}_\d{2}_\d{2}_\d{2}_\d{2})')[0],
    format='%d_%m_%y_%H_%M'
)

In [None]:
Relevant_data = metadata[["ime", "protokol", "dat3", "interval"]]
Relevant_data['datetime'] = pd.to_datetime(
    Relevant_data['dat3'].str.extract(r'(\d{2}_\d{2}_\d{2}_\d{2}_\d{2})')[0],
    format='%d_%m_%y_%H_%M'
)

merged_df = Relevant_data.merge(unique_files, on='datetime', how='outer')

print(merged_df.to_markdown())

### 2.4 Extracting visit number

In [None]:
Relevant_data = Relevant_data.sort_values(by=['ime', 'datetime'])

# Assign a visit number based on the order of unique 'datetime' entries for each 'ime'
# Group by 'ime', then identify unique visit dates with a cumulative count
Relevant_data['visit_number'] = Relevant_data.groupby('ime')['datetime'].rank(method='dense').astype(int)

# Display the updated DataFrame with the visit sequence number

Protokol_visit_number = Relevant_data[['ime', 'datetime', 'visit_number', 'interval', 'protokol']]
Protokol_visit_number.to_csv("C:/Users/User/Documents/local_repo/MEM_1/NOTEBOOKS/Non-visuazlization_notebooks/R_skripte/LMM_basic/Protokol_visit_number.csv")

## 3. Plot

In [None]:
all_plot(df = average_napetost_all_df, x_val_col='interval_bin', y_val_col='averaged') 

In [None]:
all_plot(df = average_napetost_all_df, x_val_col='interval_bin', y_val_col='kalman_filtered') 