In [17]:
import pandas as pd
import matplotlib.pyplot as plt

def analyze_road_segments(year):
    # Construct the file path based on the input year
    file_path = f'datasets_spitsuren_wegvakken/INWEVA_{year}_spits.xlsx'
    
    # Load the Excel file
    data = pd.read_excel(file_path)
    
    # Select the relevant columns
    selected_columns = data[['Wegnr_van','Wegnr_naar','Hm_van','Hm_naar','Traject_van', 'Traject_naar', 
                             'etmaal_AL', 'OS_AL', 'AS_AL', 'Ri_naar', 'Baantype_naar', 'Baantype_van']]
    
    results = []

    for direction in ['R', 'L']:
        for rush_hour, time_period in [('AS_AL', 'Morning'), ('OS_AL', 'Afternoon')]:
        # Filter for road number 4 and other conditions
            df_road_4 = selected_columns[
                (selected_columns['Wegnr_van'] == 4) & 
                (selected_columns['Wegnr_naar'] == 4) & 
                (selected_columns['Baantype_naar'] == 'HR') & 
                (selected_columns['Baantype_van'] == 'HR') & 
                (selected_columns['Ri_naar'] == direction)
            ]
    
            # Sort the filtered DataFrame by 'Hm_van'
            df_road_4_sorted = df_road_4.sort_values(by='Hm_van', ascending=True)
    
            mean_traffic = df_road_4_sorted['AS_AL'].mean()
            std_traffic = df_road_4_sorted['AS_AL'].std()
            distribution = df_road_4_sorted['AS_AL'].describe()

            results.append ({
            'year': year,
            'direction': direction,
            'time_period': time_period,
            'count': distribution['count'],
            'mean_traffic': mean_traffic,
                #'std_traffic': std_traffic,
            'min_traffic': distribution['min'],
                # '25_percentile': distribution['25%'],
                # '50_percentile': distribution['50%'],
                # '75_percentile': distribution['75%'],
            'max_traffic': distribution['max'] 
            })
    return results

years = [2012, 2017]
road_stats = []

for year in years:
    road_stats.extend(analyze_road_segments(year))

# Create a DataFrame from the collected statistics
stats_df = pd.DataFrame(road_stats)

# Display the DataFrame with statistics for each year, direction, and time period
stats_df

Unnamed: 0,year,direction,time_period,count,mean_traffic,min_traffic,max_traffic
0,2012,R,Morning,84.0,5871.964286,197.0,16572.0
1,2012,R,Afternoon,84.0,5871.964286,197.0,16572.0
2,2012,L,Morning,79.0,4830.392405,768.0,12560.0
3,2012,L,Afternoon,79.0,4830.392405,768.0,12560.0
4,2017,R,Morning,75.0,7484.066667,1531.0,16456.0
5,2017,R,Afternoon,75.0,7484.066667,1531.0,16456.0
6,2017,L,Morning,70.0,7513.114286,2415.0,14269.0
7,2017,L,Afternoon,70.0,7513.114286,2415.0,14269.0
