In [1]:
import pandas as pd

def calculate_relative_position_ratio(url_index, chain_length, total_length):
    """
    Calculates the relative position ratio of a URL within a URL redirection chain.

    Parameters:
    url_index (int): The position of the URL within the group (starting from 1).
    chain_length (int): The total number of URLs in the group.
    total_length (int): The total number of URLs in the entire dataset.

    Returns:
    float: The relative position ratio of the URL within the dataset, expressed as a fraction between 0 and 1.
    """
    if url_index < 1 or url_index > chain_length:
        raise ValueError("Invalid URL index")
    
    return float(url_index + total_length - chain_length) / float(total_length)

# Read the CSV file into a Pandas DataFrame
df = pd.read_csv('expanded_url.csv')

# Create a new column for the relative position ratio of each URL
df['position_ratio'] = df.groupby('expanded_url')['expanded_url'].apply(lambda x: calculate_relative_position_ratio(x.index[0]+1, len(x), len(df)))

# Write the updated DataFrame to a new CSV file
df.to_csv('output.csv', index=False)


ValueError: Invalid URL index