In [5]:
import numpy as np
import pandas as pd
from scipy.signal import find_peaks
import os

# Load the dataset
base_dir = os.getcwd()
drive_cycle_file_path = os.path.join(base_dir, 'Combined_Preprocessed_Dataset.xlsx')
data = pd.read_excel(drive_cycle_file_path)

# Extract relevant columns
speed_data = data['Target Speed, mph'].values

# Step 1: Segment data into overlapping time windows
def segment_by_time_window(speed_data, segment_length=10):
    """
    Segments the speed data into overlapping windows of specified length.
    """
    segments = []
    for i in range(len(speed_data) - segment_length + 1):
        segments.append(speed_data[i:i + segment_length])
    return segments

# Step 2: Categorize segments
def categorize_segment(segment_speed, threshold=0.1):
    """
    Categorizes the segment as Acceleration, Deceleration, Constant Speed, or Fluctuating.
    """
    diffs = np.diff(segment_speed)
    avg_diff = np.mean(diffs)  # Average difference
    total_values = len(diffs)
    
    positive_counts = np.sum(diffs > 0)
    negative_counts = np.sum(diffs < 0)
    zero_counts = np.sum(diffs == 0)

    # Calculate proportions
    positive_ratio = positive_counts / total_values
    negative_ratio = negative_counts / total_values
    zero_ratio = zero_counts / total_values
    
    # Categorization based on avg_diff and proportions
    if avg_diff > threshold and positive_ratio > max(negative_ratio, zero_ratio):
        return 'Acceleration'
    elif avg_diff < -threshold and negative_ratio > max(positive_ratio, zero_ratio):
        return 'Deceleration'
    elif abs(avg_diff) <= threshold and zero_ratio >= 0.5:
        return 'Constant Speed'
    else:
        return 'Fluctuating'

# Step 3: Calculate conditional probabilities
def calculate_conditional_probabilities(segments, categories, segment_length):
    """
    Calculates conditional probabilities for each category in every segment.
    """
    probabilities = []
    for i, segment in enumerate(segments):
        diffs = np.diff(segment)
        total_points = len(diffs)

        # Total counts for acceleration, deceleration, constant speed, and fluctuating
        positive_counts = np.sum(diffs > 0)
        negative_counts = np.sum(diffs < 0)
        zero_counts = np.sum(diffs == 0)

        prob_acceleration = positive_counts / total_points
        prob_deceleration = negative_counts / total_points
        prob_constant_speed = zero_counts / total_points
        probabilities.append({
            'Segment': ';'.join(map(str, segment)),
            'Category': categories[i],
            'Prob_Acceleration': prob_acceleration,
            'Prob_Deceleration': prob_deceleration,
            'Prob_Constant_Speed': prob_constant_speed
        })

    return probabilities

# Segment data
segments = segment_by_time_window(speed_data, segment_length=10)

# Categorize each segment
categories = [categorize_segment(segment) for segment in segments]

# Calculate conditional probabilities
probabilities = calculate_conditional_probabilities(segments, categories, segment_length=10)

# Save to Excel
output_df = pd.DataFrame(probabilities)
output_file_path = os.path.join(base_dir, 'Segmented_Probabilities.xlsx')
output_df.to_excel(output_file_path, index=False)
print(f"Conditional probabilities excel file saved to {output_file_path}")

Conditional probabilities excel file saved to /Users/vivek/Velocity Prediction/Segmented_Probabilities.xlsx


In [6]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers import LSTM, Dense
import os

# Step 1: Load Probabilities Excel
base_dir = os.getcwd()
input_path = os.path.join(base_dir, 'Segmented_Probabilities.xlsx')
data = pd.read_excel(input_path)

# Step 2: Prepare Features and Labels
X = data[['Prob_Acceleration', 'Prob_Deceleration', 'Prob_Constant_Speed']].values

# Generate target velocities for the next 50 seconds (replace with real data if available)
# For demonstration, using random values as placeholders.
y = np.random.random(len(data))  # Placeholder for actual next velocities.

# Step 3: Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build and Train LSTM Model
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], 1)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')

# Reshape input for LSTM
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))

# Train the model
model.fit(X_train, y_train, epochs=50, batch_size=16, validation_data=(X_test, y_test))

# Step 5: Evaluate the Model
loss = model.evaluate(X_test, y_test)
print(f"Model Loss: {loss}")

# Step 6: Predict Future Velocities
future_timestamps = 100  # Number of seconds to predict
predicted_velocities = []

# Generate predictions step-by-step for 50 seconds
input_sequence = X_test[0]  # Initial input sequence (shape: 3 features)
input_sequence = input_sequence.reshape(1, input_sequence.shape[0], 1)  # Reshape for LSTM input
for i in range(future_timestamps):
    # Predict next velocity
    pred_velocity = model.predict(input_sequence, verbose=0)
    predicted_velocities.append(pred_velocity[0, 0])

    # Update input sequence for the next step
    input_sequence = np.roll(input_sequence, -1, axis=1)  # Roll the sequence
    input_sequence[0, -1, 0] = pred_velocity[0, 0]  # Update with predicted velocity

# Step 7: Save Predicted Velocities to Excel
timestamp_count = list(range(1, future_timestamps + 1))  # Timestamps: 1 to 50
results_df = pd.DataFrame({
    'Timestamp': timestamp_count,
    'Predicted Velocity': predicted_velocities
})

output_path = os.path.join(base_dir, 'Predicted_Velocities_50s.xlsx')
results_df.to_excel(output_path, index=False)
print(f"Predicted velocities saved to {output_path}")

Epoch 1/50


  super().__init__(**kwargs)


[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 436us/step - loss: 0.0868 - val_loss: 0.0837
Epoch 2/50
[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 435us/step - loss: 0.0835 - val_loss: 0.0836
Epoch 3/50
[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 437us/step - loss: 0.0836 - val_loss: 0.0835
Epoch 4/50
[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 442us/step - loss: 0.0833 - val_loss: 0.0836
Epoch 5/50
[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 440us/step - loss: 0.0833 - val_loss: 0.0835
Epoch 6/50
[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 441us/step - loss: 0.0834 - val_loss: 0.0835
Epoch 7/50
[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 457us/step - loss: 0.0832 - val_loss: 0.0835
Epoch 8/50
[1m18094/18094[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 445us/step - loss: 0.0834 - val_loss: 0.0835
Epo

In [2]:
import numpy as np
import pandas as pd
from scipy.signal import find_peaks

# Load the dataset
import os
base_dir = os.getcwd()
drive_cycle_file_path = base_dir + '/Combined_Preprocessed_Dataset.xlsx'
data = pd.read_excel(drive_cycle_file_path)

# Extract relevant columns
speed_data = data['Target Speed, mph'].values

# Step 1: Segment data into overlapping time windows
def segment_by_time_window(speed_data, segment_length=10):
    """
    Segments the speed data into overlapping windows of specified length.
    """
    segments = []
    for i in range(len(speed_data) - segment_length + 1):
        segments.append(speed_data[i:i + segment_length])
    return segments

# Step 2: Categorize segments
def categorize_segment(segment_speed, threshold=0.1):
    """
    Categorizes the segment as Acceleration, Deceleration, Constant Speed, or Fluctuating.
    """
    diffs = np.diff(segment_speed)
    avg_diff = np.mean(diffs)  # Average difference
    total_values = len(diffs)
    
    positive_counts = np.sum(diffs > 0)
    negative_counts = np.sum(diffs < 0)
    zero_counts = np.sum(diffs == 0)

    # Calculate proportions
    positive_ratio = positive_counts / total_values
    negative_ratio = negative_counts / total_values
    zero_ratio = zero_counts / total_values
    
    # Categorization based on avg_diff and proportions
    if avg_diff > threshold and positive_ratio > max(negative_ratio, zero_ratio):
        return 'Acceleration'
    elif avg_diff < -threshold and negative_ratio > max(positive_ratio, zero_ratio):
        return 'Deceleration'
    elif abs(avg_diff) <= threshold and zero_ratio >= 0.5:
        return 'Constant Speed'
    else:
        return 'Fluctuating'

# Step 3: Calculate conditional probabilities
def calculate_conditional_probabilities(segments, categories, segment_length):
    """
    Calculates conditional probabilities for each category in every segment.
    """
    probabilities = []
    for i, segment in enumerate(segments):
        category = categories[i]
        diffs = np.diff(segment)
        
        # Total counts for acceleration, deceleration, and constant speed
        positive_counts = np.sum(diffs > 0)
        negative_counts = np.sum(diffs < 0)
        zero_counts = np.sum(diffs == 0)
        total_counts = len(diffs)
        
        # Conditional probabilities
        prob_acceleration = positive_counts / total_counts
        prob_deceleration = negative_counts / total_counts
        prob_constant_speed = zero_counts / total_counts

        probabilities.append({
            'Segment': ';'.join(map(str, segment)),
            'Category': category,
            'Prob_Acceleration': prob_acceleration,
            'Prob_Deceleration': prob_deceleration,
            'Prob_Constant_Speed': prob_constant_speed
        })

    return probabilities

# Segment data
segments = segment_by_time_window(speed_data, segment_length=10)

# Categorize each segment
categories = [categorize_segment(segment) for segment in segments]

# Calculate conditional probabilities
probabilities = calculate_conditional_probabilities(segments, categories, segment_length=10)

# Save to Excel
output_df = pd.DataFrame(probabilities)
output_file_path = base_dir + '/Segmented_Probabilities.xlsx'
output_df.to_excel(output_file_path, index=False)
print(f"Conditional probabilities saved to {output_file_path}")

Conditional probabilities saved to /Users/vivek/Velocity Prediction/Segmented_Probabilities.xlsx
