In [1]:
from google.colab import drive
drive.mount('/content/mydrive')


Mounted at /content/mydrive


In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# List of file paths
file_paths = ["/content/mydrive/MyDrive/Kaggle/Dataset/0D.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/1D.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/2D.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/3D.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/4D.csv"]

# List of target values for each file
target_values = [0, 1, 2, 3, 4]

# Number of samples to extract from the middle of each CSV
num_samples = 50000

# List to store the updated DataFrames
updated_dfs = []

# Preprocessing steps
scaler = StandardScaler()

# Loop through each file
for file_path, target_value in zip(file_paths, target_values):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Extract the desired number of samples from the middle of the DataFrame
    start_index = max(0, df.shape[0] // 2 - num_samples // 2)
    end_index = start_index + num_samples
    df = df.iloc[start_index:end_index]
    
    # Data preprocessing steps
    # Handle missing values
    df = df.interpolate()

    # Data normalization
    features = df
    normalized_features = scaler.fit_transform(features)
    df_normalized = pd.DataFrame(normalized_features, columns=features.columns)
    
    # Add a target column to the DataFrame
    df_normalized['Target'] = target_value
    
    # Append the updated DataFrame to the list
    updated_dfs.append(df_normalized)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(updated_dfs)

# Save the combined DataFrame to a CSV file
combined_df.to_csv("/content/mydrive/MyDrive/Kaggle/FT_combined_data.csv", index=False)

# Print the combined DataFrame
print(combined_df)


          V_in  Measured_RPM  Vibration_1  Vibration_2  Vibration_3  Target
0      1.54509      1.453696    -0.292070     2.254716     0.692729       0
1      1.54509      1.453696     0.224638    -2.102691    -1.919793       0
2      1.54509      1.453696    -0.585772    -1.946220    -1.288580       0
3      1.54509      1.453696     0.525003     1.828253     0.132137       0
4      1.54509      1.453696    -4.995004    -0.195169     3.100203       0
...        ...           ...          ...          ...          ...     ...
49995 -1.28523     -1.322414     0.067192    -0.000344    -0.104108       4
49996 -1.28523     -1.322414    -0.017031     0.012491    -0.071417       4
49997 -1.28523     -1.322414    -0.001922     0.033028    -0.118476       4
49998 -1.28523     -1.322414    -0.025709    -0.038015    -0.035186       4
49999 -1.28523     -1.322414     0.007841    -0.022971    -0.080995       4

[250000 rows x 6 columns]


In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# List of file paths
file_paths = ["/content/mydrive/MyDrive/Kaggle/Dataset/0E.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/1E.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/2E.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/3E.csv", "/content/mydrive/MyDrive/Kaggle/Dataset/4E.csv"]

# List of target values for each file
target_values = [0, 1, 2, 3, 4]

# Number of samples to extract from the middle of each CSV
num_samples = 5000

# List to store the updated DataFrames
updated_dfs = []

# Preprocessing steps
scaler = StandardScaler()

# Loop through each file
for file_path, target_value in zip(file_paths, target_values):
    # Read the CSV file
    df = pd.read_csv(file_path)
    
    # Extract the desired number of samples from the middle of the DataFrame
    start_index = max(0, df.shape[0] // 2 - num_samples // 2)
    end_index = start_index + num_samples
    df = df.iloc[start_index:end_index]
    
    # Data preprocessing steps
    # Handle missing values
    df = df.interpolate()

    # Data normalization
    features = df
    normalized_features = scaler.fit_transform(features)
    df_normalized = pd.DataFrame(normalized_features, columns=features.columns)
    
    # Add a target column to the DataFrame
    df_normalized['Target'] = target_value
    
    # Append the updated DataFrame to the list
    updated_dfs.append(df_normalized)

# Concatenate all DataFrames into a single DataFrame
combined_df = pd.concat(updated_dfs)

# Save the combined DataFrame to a CSV file
combined_df.to_csv("/content/mydrive/MyDrive/Kaggle/E_FT_combined_data.csv", index=False)

# Print the combined DataFrame
print(combined_df)


      V_in  Measured_RPM  Vibration_1  Vibration_2  Vibration_3  Target
0      0.0     -3.167609    -2.235252     1.974175     2.729873       0
1      0.0     -3.167609    -1.185190    -0.872208     0.887431       0
2      0.0     -3.167609    -0.270910    -0.558714     0.888415       0
3      0.0     -3.167609     0.581266    -0.151156    -1.655768       0
4      0.0     -3.167609     1.668626     1.478789    -5.452359       0
...    ...           ...          ...          ...          ...     ...
4995   0.0     -1.000000    -0.262406     0.532538     0.265492       4
4996   0.0     -1.000000     0.740770     1.486638    -0.948785       4
4997   0.0     -1.000000     0.849010    -1.761848    -0.906289       4
4998   0.0     -1.000000     0.368707    -0.942933    -0.124540       4
4999   0.0     -1.000000    -1.556322     1.918962     1.245834       4

[25000 rows x 6 columns]
