In [1]:
import pandas as pd
import os
import json

In [None]:
df = pd.read_csv("combined_df.csv")
df.shape

In [None]:
df['scenario'].value_counts()

In [None]:
df.head()

1. Suppose each simulation run has a result of accident/non-accident, calculate the 
probability of accident (counts, marginal probability). Hint: for each run, the 
collision results are stored in ‘route_highway.txt’. You can check the accident 
status by looking at the ‘status’ field under the ‘record’ section (‘Completed’ means 
no accident; ‘Failed’ means an accident has occurred). (1 point) 

In [None]:
scenario_status = df.groupby('scenario')['status'].nunique()
scenario_status

In [None]:
total_runs = df.drop_duplicates(subset=["scenario"]).shape[0]
accident_runs = df[df['status'] == 'Failed'].drop_duplicates(subset=["scenario"]).shape[0]
print(f"Total number of distinct simulation runs: {total_runs}")
print(f"Number of accident runs: {accident_runs}")
accident_probability = accident_runs / total_runs
print(f"Accident probability: {accident_probability:.4f}")

2. By looking at the completion records and the plots you generated in Task 1, under
- which weather condition(s) did the accident happen?
- Does that match your guess in Task 1?
- When did the accident happen during those simulation runs?
- Why do you think the accident happened at that instance? Discuss each accident case separately. (2 points)

In [7]:
pd.set_option('display.max_columns', None)

In [8]:
rain_noon_df = df[df['scenario']=='rain-noon']

In [None]:
rain_noon_df

In [None]:
accident_brake_data = rain_noon_df[rain_noon_df['brake'] == 1]
accident_brake_data

In [None]:
df.iloc[4224:4240]

In [12]:
import matplotlib.pyplot as plt
features = ['throttle', 'steer', 'brake', 'cvip', 'x', 'y', 'v']

In [None]:
df['normalized_ts'] = df.groupby('scenario')['ts'].transform(lambda x: x - x.min())

# Create a plot for each feature with 'normalized_ts' as the x-axis and different scenarios as different lines
for feature in features:
    plt.figure(figsize=(10, 6))
    # Plot each scenario as a line
    for scenario in df['scenario'].unique():
        scenario_data = df[df['scenario'] == scenario]
        plt.plot(scenario_data['normalized_ts'], scenario_data[feature], label=scenario)
    plt.title(f'{feature} Over Normalized Time for Different Scenarios')
    plt.xlabel('Normalized Timestamp')
    plt.ylabel(feature)
    plt.legend(title='Scenario')
    plt.grid(True)
    plt.show()


In [None]:
# Limit the plot to normalized time greater than 10 seconds
feature = 'cvip'

plt.figure(figsize=(10, 6))
# Plot each scenario as a line, restricting to normalized_ts > 10
for scenario in df['scenario'].unique():
    scenario_data = df[(df['scenario'] == scenario) & (df['normalized_ts'] > 10)]
    plt.plot(scenario_data['normalized_ts'], scenario_data[feature], label=scenario)

plt.title(f'{feature} Over Normalized Time (After 10 seconds) for Different Scenarios')
plt.xlabel('Normalized Timestamp')
plt.ylabel(feature)
plt.legend(title='Scenario')
plt.grid(True)
plt.show()


In [None]:
# Limit the plot to normalized time between 350 and 450 seconds
for feature in features:
    plt.figure(figsize=(10, 6))
    # Plot each scenario as a line, restricting to normalized_ts between 350 and 450
    for scenario in df['scenario'].unique():
        scenario_data = df[(df['scenario'] == scenario) &
                                    (df['normalized_ts'] >= 360) &
                                    (df['normalized_ts'] <= 450)]
        plt.plot(scenario_data['normalized_ts'], scenario_data[feature], label=scenario)
    plt.title(f'{feature} Over Normalized Time (350-450 seconds) for Different Scenarios')
    plt.xlabel('Normalized Timestamp')
    plt.ylabel(feature)
    plt.legend(title='Scenario')
    plt.grid(True)
    plt.show()


In [None]:
selected_scenarios = ['clear-noon', 'clear-sunset', 'rain-noon']
for feature in features:
    plt.figure(figsize=(10, 6))
    # Plot each selected scenario as a line, restricting to normalized_ts between 350 and 450
    for scenario in selected_scenarios:
        scenario_data = df[(df['scenario'] == scenario) &
                                    (df['normalized_ts'] >= 360) &
                                    (df['normalized_ts'] <= 450)]
        plt.plot(scenario_data['normalized_ts'], scenario_data[feature], label=scenario)
    
    plt.title(f'{feature} Over Normalized Time (360-450 seconds) for Selected Scenarios')
    plt.xlabel('Normalized Timestamp')
    plt.ylabel(feature)
    plt.legend(title='Scenario')
    plt.grid(True)
    plt.show()