In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn

In [2]:
filtered_columns = ['recording_time', 'dateDay-1', 'dateMonth-1', 'dateYear-1', 'phaseOfFlight-1',
                 'message0418DAA-1','message0422DAA-1','amscHprsovDrivF-1a', 'amscHprsovDrivF-1b',
                 'amscHprsovDrivF-2b', 'amscPrsovDrivF-1a',
                 'amscPrsovDrivF-1b', 'amscPrsovDrivF-2b',
                 'basBleedLowPressF-1a', 'basBleedLowPressF-2b',
                 'basBleedLowTempF-1a', 'basBleedLowTempF-2b',
                 'basBleedOverPressF-1a', 'basBleedOverPressF-2b',
                 'basBleedOverTempF-1a', 'basBleedOverTempF-2b',
                 'bleedFavTmCmd-1a', 'bleedFavTmCmd-1b',
                 'bleedFavTmCmd-2a', 'bleedFavTmCmd-2b', 'bleedFavTmFbk-1a',
                 'bleedFavTmFbk-1b', 'bleedFavTmFbk-2b', 'bleedHprsovCmdStatus-1a',
                 'bleedHprsovCmdStatus-1b', 'bleedHprsovCmdStatus-2a',
                 'bleedHprsovCmdStatus-2b', 'bleedHprsovOpPosStatus-1a',
                 'bleedHprsovOpPosStatus-1b', 'bleedHprsovOpPosStatus-2a',
                 'bleedHprsovOpPosStatus-2b', 'bleedMonPress-1a',
                 'bleedMonPress-1b', 'bleedMonPress-2a', 'bleedMonPress-2b',
                 'bleedOnStatus-1a', 'bleedOnStatus-1b', 'bleedOnStatus-2b',
                 'bleedOverpressCas-2a', 'bleedOverpressCas-2b',
                 'bleedPrecoolDiffPress-1a', 'bleedPrecoolDiffPress-1b',
                 'bleedPrecoolDiffPress-2a', 'bleedPrecoolDiffPress-2b',
                 'bleedPrsovClPosStatus-1a', 'bleedPrsovClPosStatus-2a',
                 'bleedPrsovFbk-1a']

In [3]:
def convert_columns(df_filtered):
    for coluna in df_filtered.columns:
        if df_filtered[coluna].dtype == 'float64':
            df_filtered[coluna] = df_filtered[coluna].astype('float32')

        if df_filtered[coluna].dtype == 'int64':
            df_filtered[coluna] = df_filtered[coluna].astype('int32')

    return df_filtered

In [4]:
folder_path = "airplane_data_089"

columns_to_check = ['message0418DAA-1', 'message0422DAA-1']

files_with_variation = []

parquet_files = [file for file in os.listdir(folder_path) if file.endswith('.parquet')]

for file in parquet_files:
    file_path = os.path.join(folder_path, file)
    df = pd.read_parquet(file_path)
    
    variation_detected = any(df[column].nunique() > 1 for column in columns_to_check if not df[column].isin([0, np.nan]).all())
    
    if variation_detected:
        files_with_variation.append(file)

print("Arquivos com variação nas colunas:", files_with_variation)

Arquivos com variação nas colunas: ['TCRF_ARCHIVE_06120089_20221212212511.parquet', 'TCRF_ARCHIVE_06120089_20221212212512.parquet']


In [5]:
def read_filtered_columns_from_parquet(file_path, filtered_columns):
    df = pd.read_parquet(file_path, columns=filtered_columns)
    return df

def process_files_incrementally(folder_path, selected_files=None,filtered_columns=None):
    parquet_files = [file for file in os.listdir(folder_path) if file.endswith('.parquet')]

    if selected_files:
        parquet_files = [file for file in parquet_files if file in selected_files]

    if not parquet_files:
        print("No Parquet files found in the specified folder.")
        return None
    
    dataframes = []
    
    for file in parquet_files:
        file_path = os.path.join(folder_path, file)
        processed_df = read_filtered_columns_from_parquet(file_path, filtered_columns)
        dataframes.append(processed_df)
    
    combined_df = pd.concat(dataframes)
    
    return combined_df

folder_name = "airplane_data_089"
selected_files = ['TCRF_ARCHIVE_06120089_20221212212511.parquet', 'TCRF_ARCHIVE_06120089_20221212212512.parquet']

flights_combined_data_fail_089 = process_files_incrementally(folder_name, selected_files, filtered_columns)

In [6]:
flights_combined_data_fail_089

Unnamed: 0,recording_time,dateDay-1,dateMonth-1,dateYear-1,phaseOfFlight-1,message0418DAA-1,message0422DAA-1,amscHprsovDrivF-1a,amscHprsovDrivF-1b,amscHprsovDrivF-2b,...,bleedOnStatus-2b,bleedOverpressCas-2a,bleedOverpressCas-2b,bleedPrecoolDiffPress-1a,bleedPrecoolDiffPress-1b,bleedPrecoolDiffPress-2a,bleedPrecoolDiffPress-2b,bleedPrsovClPosStatus-1a,bleedPrsovClPosStatus-2a,bleedPrsovFbk-1a
0,0,,,,0.0,,,,,,...,0.0,,,,,,,1.0,1.0,
1,50,,,,,,,0.0,0.0,0.0,...,,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,,,0.875
2,100,,,,0.0,,,,,,...,0.0,,,,,,,1.0,1.0,
3,150,9.0,12.0,2022.0,,0.0,0.0,,,,...,,,,,,,,,,
4,200,,,,0.0,,,,,,...,0.0,,,,,,,1.0,1.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13976,698800,,,,0.0,,,,,,...,0.0,,,,,,,1.0,1.0,
13977,698850,,,,,,,,,,...,,,,,,,,,,
13978,698900,,,,0.0,,,,,,...,0.0,,,,,,,1.0,1.0,
13979,698950,,,,,,,0.0,0.0,0.0,...,,0.0,0.0,-0.03125,-0.09375,0.00000,0.00000,,,0.750


In [7]:
for column in flights_combined_data_fail_089.columns:
    nan_start = True
    for i in range(len(flights_combined_data_fail_089)):
        if nan_start and not pd.isna(flights_combined_data_fail_089[column].iloc[i]):
            nan_start = False
            fill_value = flights_combined_data_fail_089[column].iloc[i]
        if nan_start:
            flights_combined_data_fail_089[column].iloc[i] = 0
        else:
            if pd.isna(flights_combined_data_fail_089[column].iloc[i]):
                flights_combined_data_fail_089[column].iloc[i] = fill_value

print(flights_combined_data_fail_089)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  flights_combined_data_fail_089[column].iloc[i] = 0
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  flights_combined_data_fail_089[column].iloc[i] = fill_value


       recording_time  dateDay-1  dateMonth-1  dateYear-1  phaseOfFlight-1  \
0                   0        0.0          0.0         0.0              0.0   
1                  50        0.0          0.0         0.0              0.0   
2                 100        0.0          0.0         0.0              0.0   
3                 150        9.0         12.0      2022.0              0.0   
4                 200        9.0         12.0      2022.0              0.0   
...               ...        ...          ...         ...              ...   
13976          698800        9.0         12.0      2022.0              0.0   
13977          698850        9.0         12.0      2022.0              0.0   
13978          698900        9.0         12.0      2022.0              0.0   
13979          698950        9.0         12.0      2022.0              0.0   
13980          699000        9.0         12.0      2022.0              0.0   

       message0418DAA-1  message0422DAA-1  amscHprsovDrivF-1a  

In [8]:
flights_combined_data_fail_089

Unnamed: 0,recording_time,dateDay-1,dateMonth-1,dateYear-1,phaseOfFlight-1,message0418DAA-1,message0422DAA-1,amscHprsovDrivF-1a,amscHprsovDrivF-1b,amscHprsovDrivF-2b,...,bleedOnStatus-2b,bleedOverpressCas-2a,bleedOverpressCas-2b,bleedPrecoolDiffPress-1a,bleedPrecoolDiffPress-1b,bleedPrecoolDiffPress-2a,bleedPrecoolDiffPress-2b,bleedPrsovClPosStatus-1a,bleedPrsovClPosStatus-2a,bleedPrsovFbk-1a
0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.00000,0.00000,0.00000,0.00000,1.0,1.0,0.000
1,50,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,1.0,1.0,0.875
2,100,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,1.0,1.0,0.875
3,150,9.0,12.0,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,1.0,1.0,0.875
4,200,9.0,12.0,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,1.0,1.0,0.875
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13976,698800,9.0,12.0,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,1.0,1.0,0.875
13977,698850,9.0,12.0,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,1.0,1.0,0.875
13978,698900,9.0,12.0,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.06250,0.03125,0.03125,0.03125,1.0,1.0,0.875
13979,698950,9.0,12.0,2022.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,-0.03125,-0.09375,0.00000,0.00000,1.0,1.0,0.750


In [9]:
flights_combined_data_fail_089.to_parquet("flights_combined_data_fail_089.parquet")