In [1]:
import sys
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os
import glob
import plotly.express as px
from scipy.fft import fft
%matplotlib inline

## Path

In [2]:
parent_dir = os.path.abspath(r'D:')
concatenated_yaws_dir = os.path.join(parent_dir, 'processed_data', 'Concatenated_Yaws')
#inputoutput_dir = os.path.join(parent_dir, 'processed_data', 'InputOutput')
output_dir = os.path.join(parent_dir, 'processed_data', 'ModelingData')


## New features 

In [3]:
concatenated_yaws_dir = os.path.join(parent_dir, 'processed_data', 'Concatenated_Yaws')

all_files = [file for file in os.listdir(concatenated_yaws_dir) if file.endswith('.csv')]

if len(all_files) == 0:
    print("No CSV files found in the directory.")
else:
    for file in all_files:
        file_path = os.path.join(concatenated_yaws_dir, file)

        df = pd.read_csv(file_path)

        if 'Turbulence' in df.columns:
            print("Turbulence feature already added in", file)
        else:
            df['Turbulence'] = df['wind_speed_std'] / df['wind_speed_mean']
            df.to_csv(file_path, index=False)
            print("Turbulence feature added to", file)


Turbulence feature added to Yaw01_combined_data.csv
Turbulence feature added to Yaw02_combined_data.csv
Turbulence feature added to Yaw03_combined_data.csv
Turbulence feature added to Yaw04_combined_data.csv
Turbulence feature added to Yaw05_combined_data.csv
Turbulence feature added to Yaw-11_combined_data.csv
Turbulence feature added to Yaw-12_combined_data.csv
Turbulence feature added to Yaw-13_combined_data.csv
Turbulence feature added to Yaw-14_combined_data.csv
Turbulence feature added to Yaw-15_combined_data.csv
Turbulence feature added to Yaw-16_combined_data.csv
Turbulence feature added to Yaw-17_combined_data.csv
Turbulence feature added to Yaw-18_combined_data.csv
Turbulence feature added to Yaw-19_combined_data.csv
Turbulence feature added to Yaw-20_combined_data.csv
Turbulence feature added to Yaw06_combined_data.csv
Turbulence feature added to Yaw07_combined_data.csv
Turbulence feature added to Yaw08_combined_data.csv
Turbulence feature added to Yaw09_combined_data.csv
Tu

## List of all Yaws

In [4]:

concatenated_yaws_dir = os.path.join(parent_dir, 'processed_data', 'Concatenated_Yaws')

os.chdir(concatenated_yaws_dir) 
csv_files = glob.glob('*.csv')

concatenated_dataframes = []  

for csv_file in csv_files:
    df = pd.read_csv(csv_file)
    concatenated_dataframes.append(df)

concatenated_df = pd.concat(concatenated_dataframes)

print(concatenated_df)


      acceleration_CROSS_max  acceleration_CROSS_mean  \
0                   0.082824                -0.000035   
1                   0.079123                -0.000025   
2                   0.083427                -0.000021   
3                   0.071739                -0.000006   
4                   0.090891                 0.000002   
...                      ...                      ...   
1887                1.020887                 0.000085   
1888                1.214840                -0.000119   
1889                1.198685                 0.000035   
1890                1.464307                 0.000241   
1891                1.276531                -0.000316   

      acceleration_CROSS_median  acceleration_CROSS_min  \
0                     -0.000076               -0.081768   
1                      0.000207               -0.081799   
2                      0.000065               -0.088581   
3                      0.000142               -0.073519   
4                   

## Preparing dataset for modeling

In [5]:

big_train_df = pd.DataFrame()
big_test_df = pd.DataFrame()

for df in concatenated_dataframes:
    train_df = df.sample(frac=0.8, random_state=42)
    test_df = df.drop(train_df.index)

    big_train_df = pd.concat([big_train_df, train_df])
    big_test_df = pd.concat([big_test_df, test_df])

print("Big Training Dataset Shape:", big_train_df.shape)
print("Big Testing Dataset Shape:", big_test_df.shape)


Big Training Dataset Shape: (62074, 141)
Big Testing Dataset Shape: (15498, 141)


## Exporting required dataset

In [6]:

os.makedirs(output_dir, exist_ok=True)

train_output_path = os.path.join(output_dir, 'big_train.parquet')
test_output_path = os.path.join(output_dir, 'big_test.parquet')

big_train_df.to_parquet(train_output_path)
big_test_df.to_parquet(test_output_path)
