<a href="https://colab.research.google.com/github/Anwarshaikk/Velocity-Prediction/blob/main/Code_to_save_preprocessed_data.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import pandas as pd
import os
from sklearn.preprocessing import MinMaxScaler
from google.colab import files

# Define input and output file paths
input_file_standard = '/content/drive/MyDrive/Velocity Prediction/Standard Datasets/FTP Standard.xlsx'  # Path to the standard dataset (replace as needed)
input_folder_synthetic = '/content/drive/MyDrive/Velocity Prediction/Synthetic Datasets/FTP Synthetic'  # Path to the synthetic datasets folder
output_folder = '/content/drive/MyDrive/Velocity Prediction/Preprocessed datasets'  # Path to save the preprocessed datasets
combined_file_path = os.path.join(output_folder, 'Combined_Preprocessed_Dataset.xlsx')  # Ensure the output file has the .xlsx extension

# Create output folder if it doesn't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)

# Define a function to preprocess a dataset
def preprocess_dataset(file_path, scaler=None):
    data = pd.read_excel(file_path)
    time = data['Test Time, secs'].values.reshape(-1, 1)
    speed = data['Target Speed, mph'].values.reshape(-1, 1)

    # Scale speed values to the range [0, 1] using MinMaxScaler
    if scaler is None:
        scaler = MinMaxScaler(feature_range=(0, 1))
        speed = scaler.fit_transform(speed)
    else:
        speed = scaler.fit_transform(speed)  # Fit the scaler to the dataset

    # Create preprocessed DataFrame
    preprocessed_data = pd.DataFrame({
        'Test Time, secs': time.flatten(),
        'Target Speed, mph': speed.flatten()
    })

    return preprocessed_data, scaler

# Initialize the scaler
scaler = MinMaxScaler(feature_range=(0, 1))

# Preprocess the standard dataset
preprocessed_data, scaler = preprocess_dataset(input_file_standard, scaler)
preprocessed_file_path = os.path.join(output_folder, 'preprocessed_FTP_standard.xlsx')
preprocessed_data.to_excel(preprocessed_file_path, index=False, engine='openpyxl')
print(f"Standard dataset preprocessed and saved to {preprocessed_file_path}")

all_preprocessed_data = pd.DataFrame(preprocessed_data)

# Preprocess the synthetic datasets
input_files_synthetic = [os.path.join(input_folder_synthetic, f) for f in os.listdir(input_folder_synthetic) if f.endswith('.xlsx')]
for file_path in input_files_synthetic:
    preprocessed_data, _ = preprocess_dataset(file_path, scaler)
    preprocessed_file_path = os.path.join(output_folder, f'preprocessed_{os.path.basename(file_path)}')
    preprocessed_data.to_excel(preprocessed_file_path, index=False, engine='openpyxl')
    print(f"Synthetic dataset preprocessed and saved to {preprocessed_file_path}")
    all_preprocessed_data = pd.concat([all_preprocessed_data, preprocessed_data], axis=0)

# Save the combined preprocessed dataset
all_preprocessed_data.to_excel(combined_file_path, index=False, engine='openpyxl')
print(f"Combined preprocessed dataset saved to {combined_file_path}")

# Download the combined preprocessed dataset
files.download(combined_file_path)


Standard dataset preprocessed and saved to /content/drive/MyDrive/Velocity Prediction/Preprocessed datasets/preprocessed_FTP_standard.xlsx
Synthetic dataset preprocessed and saved to /content/drive/MyDrive/Velocity Prediction/Preprocessed datasets/preprocessed_FTP Standard_Synthetic_v4.xlsx
Synthetic dataset preprocessed and saved to /content/drive/MyDrive/Velocity Prediction/Preprocessed datasets/preprocessed_FTP Standard_Synthetic_v3.xlsx
Synthetic dataset preprocessed and saved to /content/drive/MyDrive/Velocity Prediction/Preprocessed datasets/preprocessed_FTP Standard_Synthetic_v2.xlsx
Synthetic dataset preprocessed and saved to /content/drive/MyDrive/Velocity Prediction/Preprocessed datasets/preprocessed_FTP Standard_Synthetic_v1.xlsx
Synthetic dataset preprocessed and saved to /content/drive/MyDrive/Velocity Prediction/Preprocessed datasets/preprocessed_FTP Standard_Synthetic_v5.xlsx
Combined preprocessed dataset saved to /content/drive/MyDrive/Velocity Prediction/Preprocessed d

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>