In [None]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="5,6"
os.environ['TF_FORCE_GPU_ALLOW_GROWTH'] = 'true'

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error

In [None]:
# Defining columns to use
columns_to_use = ["global_x", "global_y", "v_vel", "v_acc"] #using just 4 of them

# Defining chunk size for reading data
chunk_size = 100

# Initializing an empty list to store data chunks
data_chunks = []

for chunk in pd.read_csv("https://data.transportation.gov/resource/8ect-6jqj.csv", 
                         chunksize=chunk_size, usecols=columns_to_use):
    data_chunks.append(chunk)

# Concatenate data chunks into a single DataFrame
velocity_dataset = pd.concat(data_chunks, ignore_index=True)

In [None]:
# Assuming your dataset is already loaded and preprocessed
# Define input features and labels
input_features = ["global_x", "global_y", "v_acc"]
output_labels = ["v_vel"]

# Load your dataset
# v_dataset = pd.read_csv('your_dataset.csv')  # Replace with your dataset path
# Assuming v_dataset is already available

In [None]:
# Split dataset into input features and labels
X = v_dataset[input_features]
Y = v_dataset[output_labels]

# Split the dataset into training and test sets (80-20 split)
train_features, test_features, train_labels, test_labels = train_test_split(X, Y, test_size=0.2, random_state=42)

In [None]:
# Define normalization layer
feature_normalizer = layers.Normalization(axis=-1)
feature_normalizer.adapt(train_features)

# Normalize the training and test features
normalized_train_features = feature_normalizer(train_features)
normalized_test_features = feature_normalizer(test_features)

In [None]:
# Initialize and train the Random Forest Regressor
rf_model = RandomForestRegressor(n_estimators=100, random_state=42)

In [None]:
%%time
rf_model.fit(normalized_train_features, train_labels.values.ravel())

# Predict on the test set
test_predictions = rf_model.predict(normalized_test_features)

# Evaluate the model
mae = mean_absolute_error(test_labels, test_predictions)
print(f"Mean Absolute Error: {mae}")

In [None]:
# Plot the predictions vs. true values
plt.figure(figsize=(10, 6))
plt.scatter(test_labels, test_predictions, color='blue', label='Predicted Values', alpha=0.6)
plt.scatter(test_labels, test_labels, color='red', label='True Values', alpha=0.3)  # True values for comparison
plt.xlabel('True Values [vehicle_velocity]')
plt.ylabel('Predictions [vehicle_velocity]')
plt.legend()
plt.grid(True)
lims = [0, 50]
plt.xlim(lims)
plt.ylim(lims)
plt.plot(lims, lims, 'k--', label='Perfect Prediction')  # Diagonal line for perfect prediction
plt.legend()
plt.show()