In [1]:
## Combine Data and Load Libraries

In [39]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor

# List of file paths
file_paths = ['combined_results_scenario1_least.csv', 'combined_results_scenario1_least_first.csv', 'combined_results_scenario1_most.csv', 'combined_results_scenario1_most_first.csv', 'combined_results_scenario1_random.csv', 'combined_results_scenario1_sitcov.csv'] 

# Load and combine all CSV files
df_list = [pd.read_csv(file) for file in file_paths]
df = pd.concat(df_list, ignore_index=True)

# Remove the specified columns
columns_to_remove = ['Visibility', 'VehicleStability', 'RoadSurface', 'StartEgo', 'GoalEgo', 'StartOther', 'GoalOther']
df = df.drop(columns=columns_to_remove, errors='ignore')

# Display basic info
print(df.shape)
print(df.head())

(3888, 11)
   PathInteraction  PrecipitationDeposits  RoadFriction  WindIntensity  \
0                1                     60           0.2             20   
1                2                     40           0.1             40   
2                4                     40           0.2             20   
3                1                     20           0.2             60   
4                2                     60           0.2             80   

   Wetness  Precipitation  FogDistance  FogDensity  Cloudiness   Min_TTC  \
0       20             40           60          40          40  2.155576   
1       80             60           60           0         100  2.946550   
2       40             40           40          60          60  5.111825   
3       40             40           40          60         100  2.279186   
4       40             40          100          20          40  2.954053   

   Collision_Occurred  
0               False  
1               False  
2              

In [40]:
## Data Preprocessing

In [41]:
# Check for missing values
print(df.isnull().sum())

# Separate features and targets
features = df.drop(['Min_TTC', 'Collision_Occurred'], axis=1)
target_collision = df['Collision_Occurred']
target_ttc = df['Min_TTC']

PathInteraction          0
PrecipitationDeposits    0
RoadFriction             0
WindIntensity            0
Wetness                  0
Precipitation            0
FogDistance              0
FogDensity               0
Cloudiness               0
Min_TTC                  0
Collision_Occurred       0
dtype: int64


In [42]:
## Analyze Influence on Collision_Occurred

### Collision Rate per Factor Value
For each factor, calculate the collision rate for each value. This is more directly useful for importance indices.

In [43]:
# For each factor, group by values and calculate the collision rate
collision_rates = {}
for column in features.columns:
    grouped = df.groupby(column)['Collision_Occurred'].mean()
    collision_rates[column] = grouped
    print(f"Collision rates for {column}:")
    print(grouped)
    print()

Collision rates for PathInteraction:
PathInteraction
1    0.030280
2    0.055118
4    0.160150
Name: Collision_Occurred, dtype: float64

Collision rates for PrecipitationDeposits:
PrecipitationDeposits
0      0.093726
20     0.066860
40     0.095166
60     0.077670
80     0.074935
100    0.063898
Name: Collision_Occurred, dtype: float64

Collision rates for RoadFriction:
RoadFriction
0.1    0.074141
0.2    0.093301
0.4    0.063191
0.6    0.063080
0.8    0.094714
1.0    0.096824
Name: Collision_Occurred, dtype: float64

Collision rates for WindIntensity:
WindIntensity
0      0.076155
20     0.092715
40     0.082043
60     0.081522
80     0.081905
100    0.082102
Name: Collision_Occurred, dtype: float64

Collision rates for Wetness:
Wetness
0      0.092264
20     0.069767
40     0.069281
60     0.093199
80     0.074074
100    0.105691
Name: Collision_Occurred, dtype: float64

Collision rates for Precipitation:
Precipitation
0      0.086861
20     0.081818
40     0.076479
60     0.081319


In [44]:
## Transform Collision Rates to Importance Indices

In [46]:
def normalize_to_importance_indices(collision_rates):
    """
    Convert collision rates to importance indices where ALL values sum to 1 total
    """
    # First, calculate the total sum across all parameters and values
    total_sum = 0
    for param, rates in collision_rates.items():
        total_sum += rates.sum()
    
    importance_indices = {}
    
    for param, rates in collision_rates.items():
        # Normalize each value by the grand total
        importance_indices[param] = rates / total_sum
    
    return importance_indices

# Calculate importance indices
final_importance_indices = normalize_to_importance_indices(collision_rates)

# Print results in the desired format
print("# Importance indices based on collision rates:")
print("# Format: Parameter_Value,ImportanceIndex")
print()

for param, importance in final_importance_indices.items():
    line = " ".join(f"{param}_{value},{idx:.6f}" for value, idx in importance.items())
    print(line)


# Importance indices based on collision rates:
# Format: Parameter_Value,ImportanceIndex

PathInteraction_1,0.007192 PathInteraction_2,0.013092 PathInteraction_4,0.038041
PrecipitationDeposits_0,0.022263 PrecipitationDeposits_20,0.015881 PrecipitationDeposits_40,0.022605 PrecipitationDeposits_60,0.018449 PrecipitationDeposits_80,0.017800 PrecipitationDeposits_100,0.015178
RoadFriction_0.1,0.017611 RoadFriction_0.2,0.022162 RoadFriction_0.4,0.015010 RoadFriction_0.6,0.014983 RoadFriction_0.8,0.022497 RoadFriction_1.0,0.022999
WindIntensity_0,0.018089 WindIntensity_20,0.022023 WindIntensity_40,0.019488 WindIntensity_60,0.019364 WindIntensity_80,0.019455 WindIntensity_100,0.019502
Wetness_0,0.021916 Wetness_20,0.016572 Wetness_40,0.016456 Wetness_60,0.022138 Wetness_80,0.017595 Wetness_100,0.025105
Precipitation_0,0.020632 Precipitation_20,0.019434 Precipitation_40,0.018166 Precipitation_60,0.019316 Precipitation_80,0.017338 Precipitation_100,0.023038
FogDistance_0,0.019794 FogDistance_20

In [47]:
## Saving th parameters in txt file

In [48]:
with open("parameters_scenario1.txt", "w") as f:
    for param, importance in final_importance_indices.items():
        line = " ".join(f"{param}_{value},{idx:.6f}" for value, idx in importance.items())
        f.write(line + "\n")