<a href="https://colab.research.google.com/github/Rambo088/FYP/blob/FYP-SourceCode-FinalVersion/TrafficPrediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install gplearn

Collecting gplearn
  Downloading gplearn-0.4.2-py3-none-any.whl (25 kB)
Installing collected packages: gplearn
Successfully installed gplearn-0.4.2


In [None]:
!pip install gradio

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from imblearn.over_sampling import RandomOverSampler
from gplearn.genetic import SymbolicRegressor
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.models import Sequential
from keras.layers import GRU, Dropout, Dense
from keras import callbacks
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
import random

In [None]:
data = pd.read_csv("traffic.csv")
data.head()

data["DateTime"]= pd.to_datetime(data["DateTime"])

# dropping the ID column since its not needed
data = data.drop(["ID"], axis=1)
data.info()

In [None]:
# Creating a copy for EDA
eda_data = data.copy()

# Plotting Time Series
colors = ["#FF0000", "#0006FF", "#1DFF00", "#FAFF00"]
plt.figure(figsize=(25, 8), facecolor="#99ccff")
time_series_plot = sns.lineplot(x=eda_data['DateTime'], y="Vehicles", data=eda_data, hue="Junction", palette=colors)
time_series_plot.set_title("Each Junction's Traffic Over The Years")
time_series_plot.set_xlabel("Date")
time_series_plot.set_ylabel("Number of Vehicles")


# Display the plot
plt.show()

In [None]:
eda_data["Year"]= eda_data['DateTime'].dt.year
eda_data["Month"]= eda_data['DateTime'].dt.month
eda_data["Date_no"]= eda_data['DateTime'].dt.day
eda_data["Day"]= eda_data.DateTime.dt.strftime("%A")
eda_data["Hour"]= eda_data['DateTime'].dt.hour

eda_data.head()

In [None]:
plt.figure(figsize=(14, 7))
traffic_data_resampled = data.set_index('DateTime').resample('D').mean()
sns.lineplot(data=traffic_data_resampled, x=traffic_data_resampled.index, y='Vehicles')
plt.title('Traffic Volume Over Time')
plt.xlabel('Date')
plt.ylabel('Number of Vehicles')
plt.show()

In [None]:
# Plotting the new columns
new_features = [ "Year","Month", "Day", "Hour"]

for i in new_features:
    plt.figure(figsize=(12,3),facecolor="#627D78")
    ax=sns.lineplot(x=eda_data[i],y="Vehicles",data=eda_data, hue="Junction", palette=colors )
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)

In [None]:
# Assuming 'Vehicles' is the target column
X = data.drop(['DateTime', 'Vehicles'], axis=1).values
y = data['Vehicles'].values.reshape(-1, 1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Define a simple neural network for regression
class SimpleRegressionModel(nn.Module):
    def __init__(self, input_size):
        super(SimpleRegressionModel, self).__init__()
        self.linear1 = nn.Linear(input_size, 64)
        self.relu = nn.ReLU()
        self.linear2 = nn.Linear(64, 1)

    def forward(self, x):
        x = self.linear1(x)
        x = self.relu(x)
        x = self.linear2(x)
        return x

# Instantiate the model
input_size = X_train.shape[1]
model = SimpleRegressionModel(input_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor)
    loss = criterion(outputs, y_train_tensor)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Make predictions on the test set
with torch.no_grad():
    predictions = model(X_test_tensor).numpy()

# Calculate modelPerformance as a percentage
modelPerformanceReLU = 100 - mean_squared_error(y_test, predictions) * 100 / np.var(y_test)

print(f'Model Performance on Test Set: {modelPerformanceReLU:.2f}%')


Model Performance on Test Set: 45.57%


In [None]:
# Assuming you have a dataframe 'data' with features and target variable
# Replace 'YOUR_TARGET_COLUMN' with the actual name of your target column
X = data.drop(['DateTime'], axis=1).values
y = data['Vehicles'].values.reshape(-1, 1)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

# Define a GRU model for symbolic regression
class SymbolicRegressionGRU(nn.Module):
    def __init__(self, input_size):
        super(SymbolicRegressionGRU, self).__init__()
        self.gru = nn.GRU(input_size, hidden_size=50, num_layers=2, batch_first=True)
        self.fc = nn.Linear(50, 1)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])
        return out

# Instantiate the model
input_size = X_train.shape[1]
model = SymbolicRegressionGRU(input_size)

# Define loss function and optimizer
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training the model
num_epochs = 1000

for epoch in range(num_epochs):
    # Forward pass
    outputs = model(X_train_tensor.unsqueeze(1))
    loss = criterion(outputs, y_train_tensor)

    # Backward and optimize
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

# Make predictions on the test set
with torch.no_grad():
    predictions = model(X_test_tensor.unsqueeze(1)).numpy()

# Calculate accuracy as a percentage
modelPerformanceGRU = 100 - mean_squared_error(y_test, predictions) * 100 / np.var(y_test)
print(f'Model Performance on Test Set: {modelPerformanceGRU:.2f}%')


In [None]:
# Create separate dataframes for each junction
junction_1_data = data[data['Junction'] == 1].copy()
junction_2_data = data[data['Junction'] == 2].copy()
junction_3_data = data[data['Junction'] == 3].copy()
junction_4_data = data[data['Junction'] == 4].copy()

# Plotting Time Series for each junction
colors = ["#FF0000", "#0006FF", "#1DFF00", "#FAFF00"]
plt.figure(figsize=(25, 8), facecolor="#99ccff")

# Plot for Junction 1
plt.subplot(411)
sns.lineplot(x=junction_1_data['DateTime'], y="Vehicles", data=junction_1_data, color=colors[0])
plt.title("Junction 1 Traffic Over The Years")
plt.xlabel("Date")
plt.ylabel("Number of Vehicles")

# Plot for Junction 2
plt.subplot(412)
sns.lineplot(x=junction_2_data['DateTime'], y="Vehicles", data=junction_2_data, color=colors[1])
plt.title("Junction 2 Traffic Over The Years")
plt.xlabel("Date")
plt.ylabel("Number of Vehicles")

# Plot for Junction 3
plt.subplot(413)
sns.lineplot(x=junction_3_data['DateTime'], y="Vehicles", data=junction_3_data, color=colors[2])
plt.title("Junction 3 Traffic Over The Years")
plt.xlabel("Date")
plt.ylabel("Number of Vehicles")

# Plot for Junction 4
plt.subplot(414)
sns.lineplot(x=junction_4_data['DateTime'], y="Vehicles", data=junction_4_data, color=colors[3])
plt.title("Junction 4 Traffic Over The Years")
plt.xlabel("Date")
plt.ylabel("Number of Vehicles")

# Adjust layout for better visualization
plt.tight_layout()
plt.show()

In [None]:
# Read data from the XLSX file
df = pd.read_excel('ModelTests.xlsx')

# Plotting the line graph
plt.figure(figsize=(10, 6))
plt.plot(df['Test'], df['ReLU'], marker='o', label='ReLU')
plt.plot(df['Test'], df['GRU'], marker='o', label='GRU')

# Set y-axis limits
plt.ylim(40, 80)

# Add labels and title
plt.xlabel('Test')
plt.ylabel('Accuracy (%)')
plt.title('Comparison of ReLU and GRU Models')
plt.legend()

# Show the plot
plt.show()

In [None]:
sns.pairplot(data=eda_data, hue= "Junction",palette=colors)

In [None]:
data_Junction = data.pivot(columns="Junction", index="DateTime")
data_Junction.describe()

#Creating new sets
data_1 = data_Junction[[('Vehicles', 1)]]
data_2 = data_Junction[[('Vehicles', 2)]]
data_3 = data_Junction[[('Vehicles', 3)]]
data_4 = data_Junction[[('Vehicles', 4)]]
data_4 = data_4.dropna() #Junction 4 has limited data only for a few months

#Dropping level one in dfs's index as it is a multi index data frame
list_dfs = [data_1, data_2, data_3, data_4]
for i in list_dfs:
    i.columns= i.columns.droplevel(level=1)

def Sub_Plots4(data_1, data_2, data_3, data_4,title):
    fig, axes = plt.subplots(4, 1, figsize=(15, 8),facecolor="#99ccff", sharey=True)
    fig.suptitle(title)
    #J1
    pl_1=sns.lineplot(ax=axes[0],data=data_1,color=colors[0])
    #pl_1=plt.ylabel()
    axes[0].set(ylabel ="Junction 1")
    #J2
    pl_2=sns.lineplot(ax=axes[1],data=data_2,color=colors[1])
    axes[1].set(ylabel ="Junction 2")
    #J3
    pl_3=sns.lineplot(ax=axes[2],data=data_3,color=colors[2])
    axes[2].set(ylabel ="Junction 3")
    #J4
    pl_4=sns.lineplot(ax=axes[3],data=data_4,color=colors[3])
    axes[3].set(ylabel ="Junction 4")


#Plotting the dataframe to check for stationarity
Sub_Plots4(data_1.Vehicles, data_2.Vehicles,data_3.Vehicles,data_4.Vehicles,"Dataframes Before Transformation")

In [None]:
# Function to normalize data
def normalize_data(data, column):
    avg = data[column].mean()
    std_dev = data[column].std()
    normalized_data = (data[column] - avg) / std_dev
    normalized_data = normalized_data.to_frame()
    return normalized_data, avg, std_dev

# Function to difference data
def difference_data(data, column, interval):
    differences = []
    for i in range(interval, len(data)):
        diff_value = data[column][i] - data[column][i - interval]
        differences.append(diff_value)
    return differences

# Normalizing and differencing to make the series stationary
df1_norm, avg_junction1, std_dev_junction1 = normalize_data(data_1, "Vehicles")
diff_junction1 = difference_data(df1_norm, column="Vehicles", interval=(24*7))  # Weekly difference
df1_norm = df1_norm[24*7:]
df1_norm.columns = ["Normalized"]
df1_norm["Difference"] = diff_junction1

df2_norm, avg_junction2, std_dev_junction2 = normalize_data(data_2, "Vehicles")
diff_junction2 = difference_data(df2_norm, column="Vehicles", interval=(24))  # Daily difference
df2_norm = df2_norm[24:]
df2_norm.columns = ["Normalized"]
df2_norm["Difference"] = diff_junction2

df3_norm, avg_junction3, std_dev_junction3 = normalize_data(data_3, "Vehicles")
diff_junction3 = difference_data(df3_norm, column="Vehicles", interval=1)  # Hourly difference
df3_norm = df3_norm[1:]
df3_norm.columns = ["Normalized"]
df3_norm["Difference"] = diff_junction3

df4_norm, avg_junction4, std_dev_junction4 = normalize_data(data_4, "Vehicles")
diff_junction4 = difference_data(df4_norm, column="Vehicles", interval=1)  # Hourly difference
df4_norm = df4_norm[1:]
df4_norm.columns = ["Normalized"]
df4_norm["Difference"] = diff_junction4

Sub_Plots4(df1_norm.Difference, df2_norm.Difference,df3_norm.Difference,df4_norm.Difference,"Dataframes After Transformation")


In [None]:
# Preprocessing Data for Each Junction
data_J1 = df1_norm["Difference"].dropna().to_frame()
data_J2 = df2_norm["Difference"].dropna().to_frame()
data_J3 = df3_norm["Difference"].dropna().to_frame()
data_J4 = df4_norm["Difference"].dropna().to_frame()

# Function to split dataset
def split_data(df):
    train_size = int(len(df) * 0.90)
    train, test = df[:train_size], df[train_size:]
    return train, test

# Splitting the training and test datasets for each junction
Junction1_train, Junction1_test = split_data(data_J1)
Junction2_train, Junction2_test = split_data(data_J2)
Junction3_train, Junction3_test = split_data(data_J3)
Junction4_train, Junction4_test = split_data(data_J4)

# Function to create features and targets
def create_features_targets(df):
    X, y = [], []
    steps = 32
    for i in range(steps, len(df)):
        X.append(df[i - steps:i].values)
        y.append(df.iloc[i].values)
    X, y = np.array(X), np.array(y)
    return X, y

# Function to fix feature shape
def fix_feature_shape(train, test):
    train = np.reshape(train, (train.shape[0], train.shape[1], 1))
    test = np.reshape(test, (test.shape[0], test.shape[1], 1))
    return train, test

# Assigning features and target for each junction
X_trainJ1, y_trainJ1 = create_features_targets(Junction1_train)
X_testJ1, y_testJ1 = create_features_targets(Junction1_test)
X_trainJ1, X_testJ1 = fix_feature_shape(X_trainJ1, X_testJ1)

X_trainJ2, y_trainJ2 = create_features_targets(Junction2_train)
X_testJ2, y_testJ2 = create_features_targets(Junction2_test)
X_trainJ2, X_testJ2 = fix_feature_shape(X_trainJ2, X_testJ2)

X_trainJ3, y_trainJ3 = create_features_targets(Junction3_train)
X_testJ3, y_testJ3 = create_features_targets(Junction3_test)
X_trainJ3, X_testJ3 = fix_feature_shape(X_trainJ3, X_testJ3)

X_trainJ4, y_trainJ4 = create_features_targets(Junction4_train)
X_testJ4, y_testJ4 = create_features_targets(Junction4_test)
X_trainJ4, X_testJ4 = fix_feature_shape(X_trainJ4, X_testJ4)


In [None]:
def GRU_model(X_Train, y_Train, X_Test, y_Test):
    #The GRU model
    model = Sequential()
    model.add(GRU(units=64, return_sequences=True, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(BatchNormalization())
    model.add(GRU(units=32, return_sequences=True, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(BatchNormalization())
    model.add(GRU(units=16, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(BatchNormalization())
    model.add(Dense(units=1))

    #Compiling the model
    model.compile(optimizer='adam', loss='mean_squared_error')
    model.fit(X_Train,y_Train, validation_data=(X_Test, y_Test), epochs=10, batch_size=120)
    pred_GRU= model.predict(X_Test)
    return pred_GRU

def RMSE_Value(test,predicted):
    rmse = np.sqrt(np.mean(np.square(test - predicted)))
    print("The root mean squared error is {}.".format(rmse))
    return rmse

def PredictionsPlot(test,predicted,m):
    plt.figure(figsize=(12,5),facecolor="#99ccff")
    plt.plot(test, color=colors[m],label="True Value",alpha=0.5 )
    plt.plot(predicted, color="#627D78",label="Predicted Values")
    plt.title("Traffic Prediction Vs True values")
    plt.xlabel("DateTime")
    plt.ylabel("Prediction Error")
    plt.legend()
    plt.show()

In [None]:
lr_schedule = ExponentialDecay(
    initial_learning_rate=0.01,
    decay_steps=10000,
    decay_rate=0.9)

def GRU_model(X_Train, y_Train, X_Test, y_Test):
    early_stopping = callbacks.EarlyStopping(min_delta=0.001,patience=10, restore_best_weights=True)

    #The GRU model
    model = Sequential()
    model.add(GRU(units=150, return_sequences=True, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(Dropout(0.2))
    model.add(GRU(units=150, return_sequences=True, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(Dropout(0.2))
    model.add(GRU(units=50, return_sequences=True, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(Dropout(0.2))
    model.add(GRU(units=50, return_sequences=True, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(Dropout(0.2))
    #model.add(GRU(units=50, return_sequences=True,  input_shape=(X_Train.shape[1],1),activation='tanh'))
    #model.add(Dropout(0.2))
    model.add(GRU(units=50, input_shape=(X_Train.shape[1],1), activation='tanh'))
    model.add(Dropout(0.2))
    model.add(Dense(units=1))

    #Compiling the model
    model.compile(optimizer=SGD(learning_rate=lr_schedule, momentum=0.9),loss='mean_squared_error')
    model.fit(X_Train,y_Train, validation_data=(X_Test, y_Test), epochs=50, batch_size=120,callbacks=[early_stopping])
    pred_GRU= model.predict(X_Test)
    return pred_GRU

In [None]:
#Junction 1
print("\033[1;31;2m########------Junction 1------########\033[0m")
PredJ1_GRU = GRU_model(X_trainJ1,y_trainJ1,X_testJ1, y_testJ1)
print("-" * 70)

print("\033[1;31;2m########------Junction 1------########\033[0m")
RMSE_J1_GRU = RMSE_Value(y_testJ1, PredJ1_GRU)
PredictionsPlot(y_testJ1, PredJ1_GRU, 0)

#Junction 2
print("\033[1;31;2m########------Junction 2------########\033[0m")
PredJ2_GRU = GRU_model(X_trainJ2, y_trainJ2, X_testJ2, y_testJ2)
print("-" * 70)

print("\033[1;31;2m########------Junction 2------########\033[0m")
RMSE_J2_GRU = RMSE_Value(y_testJ2, PredJ2_GRU)
PredictionsPlot(y_testJ2, PredJ2_GRU, 0)

#Junction 3
print("\033[1;31;2m########------Junction 3------########\033[0m")
PredJ3_GRU = GRU_model(X_trainJ3, y_trainJ3, X_testJ3, y_testJ3)
print("-" * 70)

print("\033[1;31;2m########------Junction 3------########\033[0m")
RMSE_J3_GRU = RMSE_Value(y_testJ3, PredJ3_GRU)
PredictionsPlot(y_testJ3, PredJ3_GRU, 0)

#Junction 4
print("\033[1;31;2m########------Junction 4------########\033[0m")
PredJ4_GRU = GRU_model(X_trainJ4, y_trainJ4, X_testJ4, y_testJ4)
print("-" * 70)

print("\033[1;31;2m########------Junction 4------########\033[0m")
RMSE_J4_GRU = RMSE_Value(y_testJ4, PredJ4_GRU)
PredictionsPlot(y_testJ4, PredJ4_GRU, 0)


#--------------------
model_names = ["GRU"]
rmse_values = [RMSE_J1_GRU]

model_rmse = list(zip(model_names, rmse_values))
Results_df = pd.DataFrame(model_rmse, columns=["MODEL", "RMSE"])
styled_df = Results_df.style.background_gradient(cmap="cool")

colors = ['lightcoral']
alpha = 0.9
fig, ax = plt.subplots(figsize=(10, 4))  # Set the figure size (width, height)
ax.bar(model_names, rmse_values, color=colors, alpha=alpha)
ax.set_xlabel('Model')
ax.set_ylabel('RMSE Value')
ax.set_title('RMSE Value for Different Models - J1')
plt.show()

display(styled_df)


In [None]:
# Functions to inverse transforms and Plot comparitive plots
# invert differenced forecast
def inverse_difference(last_ob, value):
    inversed = value + last_ob
    return inversed
#Plotting the comparison
def Sub_Plots2(df_1, df_2,title,m):
    fig, axes = plt.subplots(1, 2, figsize=(18,4), sharey=True,facecolor="#99ccff")
    fig.suptitle(title)

    pl_1=sns.lineplot(ax=axes[0],data=df_1,color=colors[m])
    axes[0].set(ylabel ="Prediction")

    pl_2=sns.lineplot(ax=axes[1],data=df_2["Vehicles"],color="#627D78")
    axes[1].set(ylabel ="Orignal")

In [None]:
# invert the differenced forecast for Junction 1
recover1 = df1_norm["Difference"][-1412:-1].to_frame()
recover1["Pred"] = PredJ1_GRU
transform_reversed_J1 = inverse_difference(recover1["Difference"], recover1["Pred"]).to_frame()
transform_reversed_J1.columns = ["Pred_Normed"]

# Invert the normalization for Junction 1
final_J1_pred = (transform_reversed_J1.values * std_dev_junction1) + avg_junction1
transform_reversed_J1["Pred_Final"] = final_J1_pred

# Plotting the Predictions with originals for Junction 1
plot_comparative_plots(transform_reversed_J1["Pred_Final"], junction_1_data[-1412:-1]["Vehicles"], "Predictions And Originals For Junction 1", colors[0])

NameError: name 'plot_comparative_plots' is not defined

In [None]:
# invert the differenced forecast for Junction 1
recover1 = df1_norm["Difference"][-1412:-1].to_frame()
recover1["Pred"]= PredJ1_GRU
transform_reversed_J1 = inverse_difference(recover1["Difference"], recover1["Pred"]).to_frame()
transform_reversed_J1.columns = ["Pred_Normed"]

#Invert the normalizeation J1
Final_J1_Pred = (df1_norm.values* std_dev_junction1) + avg_junction1
transform_reversed_J1["Pred_Final"] = Final_J1_Pred

#Plotting the Predictions with orignals
Sub_Plots2(transform_reversed_J1["Pred_Final"], df1_norm[-1412:-1],"Pridictions And Orignals For Junction 1", 0)

ValueError: Length of values (14424) does not match length of index (1411)

In [None]:
# invert the differenced forecast for Junction 2
recover2 = df2_norm["Difference"][-1426:-1].to_frame()
recover2["Pred"] = PredJ2_GRU
transform_reversed_J2 = inverse_difference(recover1["Difference"], recover1["Pred"]).to_frame()
transform_reversed_J2.columns = ["Pred_Normed"]

# Invert the normalization for Junction 2
final_J2_pred = (transform_reversed_J2.values * std_dev_junction1) + avg_junction2
transform_reversed_J2["Pred_Final"] = final_J2_pred

# Plotting the Predictions with originals for Junction 1
plot_comparative_plots(transform_reversed_J2["Pred_Final"], junction_2_data[-1426:-1]["Vehicles"],
                        "Predictions And Originals For Junction 2", colors[0])

NameError: name 'plot_comparative_plots' is not defined

In [None]:
# invert the differenced forecast for Junction 3
recover3 = df3_norm["Difference"][-1429:-1].to_frame()
recover3["Pred"] = PredJ3_GRU
transform_reversed_J3 = inverse_difference(recover1["Difference"], recover1["Pred"]).to_frame()
transform_reversed_J3.columns = ["Pred_Normed"]

# Invert the normalization for Junction 3
final_J3_pred = (transform_reversed_J3.values * std_dev_junction1) + avg_junction3
transform_reversed_J3["Pred_Final"] = final_J3_pred

# Plotting the Predictions with originals for Junction 1
plot_comparative_plots(transform_reversed_J3["Pred_Final"], junction_3_data[-1429:-1]["Vehicles"],
                        "Predictions And Originals For Junction 3", colors[0])

NameError: name 'plot_comparative_plots' is not defined

In [None]:
# invert the differenced forecast for Junction 4
recover4 = df4_norm["Difference"][-404:-1].to_frame()
recover4["Pred"] = PredJ4_GRU
transform_reversed_J4 = inverse_difference(recover1["Difference"], recover1["Pred"]).to_frame()
transform_reversed_J4.columns = ["Pred_Normed"]

# Invert the normalization for Junction 4
final_J4_pred = (transform_reversed_J4.values * std_dev_junction1) + avg_junction4
transform_reversed_J4["Pred_Final"] = final_J4_pred

# Plotting the Predictions with originals for Junction 1
plot_comparative_plots(transform_reversed_J4["Pred_Final"], junction_4_data[-404:-1]["Vehicles"],
                        "Predictions And Originals For Junction 4", colors[0])

NameError: name 'plot_comparative_plots' is not defined

In [None]:
def genetic_algorithm(population_size, num_generations):
    # Initialize population
    population = initialize_population(population_size)

    for generation in range(num_generations):
        # Evaluate population
        fitness_values = evaluate_population(population)

        # Select parents for reproduction
        selected_parents = select_parents(population, fitness_values)

        # Create offspring using crossover
        offspring = []
        for i in range(0, len(selected_parents), 2):
            parent1 = selected_parents[i]
            parent2 = selected_parents[i + 1]
            child1, child2 = avoid_fit_segments_crossover(parent1, parent2)
            offspring.append(child1)
            offspring.append(child2)

        # Mutate offspring
        mutated_offspring = mutate(offspring)

        # Replace population with offspring
        population = replace_population(population, mutated_offspring)

        # Optionally, you can track and print the best individual in each generation
        best_individual = get_best_individual(population, fitness_values)
        print(f"Generation {generation + 1}: Best Fitness = {best_individual.fitness}")

    # After all generations, return the best individual found
    best_individual = get_best_individual(population, fitness_values)
    return best_individual

In [None]:
def initialize_population(population_size):
    # Create an initial population of individuals
    population = []
    for _ in range(population_size):
        individual = create_individual()  # Define create_individual function based on your problem
        population.append(individual)
    return population

In [None]:
def evaluate_population(population):
    # Evaluate the fitness of each individual in the population
    fitness_values = []
    for individual in population:
        fitness = evaluate_individual(individual)  # Define evaluate_individual function based on your problem
        fitness_values.append(fitness)
    return fitness_values

In [None]:
def select_parents(population, fitness_values):
    # Select parents for reproduction based on their fitness
    selected_parents = []
    total_fitness = sum(fitness_values)
    probabilities = [fitness / total_fitness for fitness in fitness_values]
    for _ in range(len(population) // 2):
        parent1 = random.choices(population, weights=probabilities)[0]
        parent2 = random.choices(population, weights=probabilities)[0]
        selected_parents.append(parent1)
        selected_parents.append(parent2)
    return selected_parents

In [None]:
def avoid_fit_segments_crossover(parent1, parent2):
    # Perform crossover to create offspring, avoiding fit segments
    # Define your crossover strategy based on your problem and the avoid_fit_segments_crossover function
    child1, child2 = crossover(parent1, parent2)  # Implement your crossover function
    return child1, child2

In [None]:
def mutate(offspring):
    # Optionally, apply mutation to introduce diversity into the population
    mutated_offspring = []
    for individual in offspring:
        mutated_individual = apply_mutation(individual)  # Define apply_mutation function based on your problem
        mutated_offspring.append(mutated_individual)
    return mutated_offspring

In [None]:
def replace_population(population, offspring):
    # Replace some individuals in the population with the offspring
    # You can use different replacement strategies like elitism, age-based replacement, etc.
    # Implement the replacement strategy based on your problem and the current population and offspring
    new_population = elite_selection(population, offspring)  # Define elite_selection function based on your problem
    return new_population

In [None]:
if __name__ == "__main__":
    # Set your genetic algorithm parameters
    population_size = 100
    num_generations = 50

    # Run the genetic algorithm
    best_individual = genetic_algorithm(population_size, num_generations)

    # Optionally, you can use the best individual found for further analysis or applications
    print("Best Individual:", best_individual)

NameError: name 'create_individual' is not defined