In [1]:
## Combine Data and Load Libraries

In [13]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# List of file paths
file_paths = ['combined_results_random_approach.xlsx', 'combined_results_scenario2_least.csv', 'combined_results_scenario2_most.csv', 'combined_results_sitcov_approach.xlsx'] 

df_list = []

# Load and combine all CSV files
for file in file_paths:
    # Load depending on file extension
    if file.lower().endswith(".csv"):
        df = pd.read_csv(file)
    elif file.lower().endswith((".xls", ".xlsx")):
        df = pd.read_excel(file, engine="openpyxl")
    else:
        raise ValueError(f"Unsupported file type: {file}")

    # Drop "Min_TTC" column, we don't use it
    if "Min_TTC" in df.columns:
        df = df.drop(columns=["Min_TTC"])

    # Unify collision column name
    if "CollisionOccurred" in df.columns:
        df = df.rename(columns={"CollisionOccurred": "Collision_Occurred"})

    # Make sure column order is consistent across files
    df = df.reindex(sorted(df.columns), axis=1)

    df_list.append(df)

df = pd.concat(df_list, ignore_index=True)

# Remove the specified columns
columns_to_remove = ['Visibility', 'VehicleStability', 'RoadSurface', 'StartEgo', 'GoalEgo', 'StartOther', 'GoalOther']
df = df.drop(columns=columns_to_remove, errors='ignore')

# Display basic info
print(df.shape)
print(df.head())

(2592, 11)
   Cloudiness  Collision_Occurred  FogDensity  FogDistance PathInteraction  \
0          20               False          60           60              c4   
1          60               False           0          100              c4   
2          40               False          80            0              c1   
3          60               False          20          100              c1   
4         100               False          40          100              c2   

   Precipitation  PrecipitationDeposits  RoadFriction  TimeOfDay  Wetness  \
0             80                      0           0.6        -90       60   
1              0                     60           1.0         30       60   
2              0                      0           0.2         60       60   
3             60                     80           0.4        -30       20   
4            100                     40           0.6         60      100   

   WindIntensity  
0             40  
1             60  


In [14]:
## Data Preprocessing

In [15]:
# Check for missing values
print(df.isnull().sum())

# Separate features and targets
features = df.drop(['Collision_Occurred'], axis=1)
target_collision = df['Collision_Occurred']

Cloudiness               0
Collision_Occurred       0
FogDensity               0
FogDistance              0
PathInteraction          0
Precipitation            0
PrecipitationDeposits    0
RoadFriction             0
TimeOfDay                0
Wetness                  0
WindIntensity            0
dtype: int64


In [16]:
## Analyze Influence on Collision_Occurred

### Collision Rate per Factor Value
For each factor, calculate the collision rate for each value. This is more directly useful for importance indices.

In [17]:
# For each factor, group by values and calculate the collision rate
collision_rates = {}
for column in features.columns:
    grouped = df.groupby(column)['Collision_Occurred'].mean()
    collision_rates[column] = grouped
    print(f"Collision rates for {column}:")
    print(grouped)
    print()

Collision rates for Cloudiness:
Cloudiness
0      0.058625
20     0.028846
40     0.039894
60     0.021635
80     0.037464
100    0.039370
Name: Collision_Occurred, dtype: float64

Collision rates for FogDensity:
FogDensity
0      0.010033
20     0.021798
40     0.044041
60     0.038860
80     0.024390
100    0.066897
Name: Collision_Occurred, dtype: float64

Collision rates for FogDistance:
FogDistance
0      0.054487
20     0.040936
40     0.051630
60     0.022346
80     0.024390
100    0.029900
Name: Collision_Occurred, dtype: float64

Collision rates for PathInteraction:
PathInteraction
c1    0.036817
c2    0.036131
c4    0.051570
Name: Collision_Occurred, dtype: float64

Collision rates for Precipitation:
Precipitation
0      0.062500
20     0.034562
40     0.032258
60     0.024024
80     0.024000
100    0.030702
Name: Collision_Occurred, dtype: float64

Collision rates for PrecipitationDeposits:
PrecipitationDeposits
0      0.054604
20     0.027569
40     0.032787
60     0.043103

In [18]:
## Transform Collision Rates to Importance Indices

In [19]:
def normalize_to_importance_indices(collision_rates):
    """
    Convert collision rates to importance indices where ALL values sum to 1 total
    """
    # First, calculate the total sum across all parameters and values
    total_sum = 0
    for param, rates in collision_rates.items():
        total_sum += rates.sum()
    
    importance_indices = {}
    
    for param, rates in collision_rates.items():
        # Normalize each value by the grand total
        importance_indices[param] = rates / total_sum
    
    return importance_indices

# Calculate importance indices
final_importance_indices = normalize_to_importance_indices(collision_rates)

# Print results in the desired format
print("# Importance indices based on collision rates:")
print("# Format: Parameter_Value,ImportanceIndex")
print()

for param, importance in final_importance_indices.items():
    line = " ".join(f"{param}_{value},{idx:.6f}" for value, idx in importance.items())
    print(line)


# Importance indices based on collision rates:
# Format: Parameter_Value,ImportanceIndex

Cloudiness_0,0.027703 Cloudiness_20,0.013631 Cloudiness_40,0.018852 Cloudiness_60,0.010224 Cloudiness_80,0.017704 Cloudiness_100,0.018605
FogDensity_0,0.004741 FogDensity_20,0.010301 FogDensity_40,0.020812 FogDensity_60,0.018364 FogDensity_80,0.011526 FogDensity_100,0.031613
FogDistance_0,0.025748 FogDistance_20,0.019344 FogDistance_40,0.024398 FogDistance_60,0.010560 FogDistance_80,0.011526 FogDistance_100,0.014130
PathInteraction_c1,0.017398 PathInteraction_c2,0.017074 PathInteraction_c4,0.024370
Precipitation_0,0.029535 Precipitation_20,0.016333 Precipitation_40,0.015244 Precipitation_60,0.011353 Precipitation_80,0.011341 Precipitation_100,0.014508
PrecipitationDeposits_0,0.025803 PrecipitationDeposits_20,0.013028 PrecipitationDeposits_40,0.015494 PrecipitationDeposits_60,0.020369 PrecipitationDeposits_80,0.023358 PrecipitationDeposits_100,0.008553
RoadFriction_0.1,0.015199 RoadFriction_0.2,0.0

In [20]:
## Saving th parameters in txt file

In [21]:
with open("parameters_scenario2.txt", "w") as f:
    for param, importance in final_importance_indices.items():
        line = " ".join(f"{param}_{value},{idx:.6f}" for value, idx in importance.items())
        f.write(line + "\n")