In [72]:
import csv

In [73]:

def compare_traffic_rates(file1_path, file2_path):
    """Compares traffic rates between two CSV files and calculates a similarity score."""

    try:
        with open(file1_path, 'r') as file1, open(file2_path, 'r') as file2:
            reader1 = csv.reader(file1)
            reader2 = csv.reader(file2)

            next(reader1)  # Skip header row
            next(reader2)  # Skip header row

            similarity_score = 0
            total_comparisons = 0

            for row1, row2 in zip(reader1, reader2):
                if len(row1) < 3 or len(row2) < 3: #check for missing columns
                    print("Error: one of the rows does not have enough columns")
                    return None
                try:
                    rate1 = float(row1[2])
                    rate2 = float(row2[2])
                except ValueError:
                    print("Error: Could not convert rate to a number")
                    return None

                total_comparisons += 1
                if rate1 == rate2:
                    similarity_score += 1

            return similarity_score, total_comparisons

    except FileNotFoundError:
        print("Error: One or both files not found.")
        return None

# Example usage:
# file1 = './output/regions/west_asia/genral_labeled_data_with_relative_traffic_rates/Technology and Science/Malaysia_with_relative_traffic_rates.csv'
# file1 = './output/regions/west_asia/genral_labeled_data_with_relative_traffic_rates/Technology and Science/Philippines_with_relative_traffic_rates.csv'
# file2 = './output/regions/west_asia/genral_labeled_data_with_relative_traffic_rates/Technology and Science/Singapore_with_relative_traffic_rates.csv'
# similarity mean almost > 62 (acceptable)

# file1 = './output/regions/west_asia/genral_labeled_data_with_relative_traffic_rates/Life_style/Malaysia_with_relative_traffic_rates.csv'
file1 = './output/regions/west_asia/genral_labeled_data_with_relative_traffic_rates/Life_style/Philippines_with_relative_traffic_rates.csv'
file2 = './output/regions/west_asia/genral_labeled_data_with_relative_traffic_rates/Life_style/Singapore_with_relative_traffic_rates.csv'
# bad

# file2 = './output/regions/europe/genral_labeled_data_with_relative_traffic_rates/Technology and Science/Denmark_with_relative_traffic_rates.csv'
# file2 = './output/regions/europe/genral_labeled_data_with_relative_traffic_rates/Technology and Science/Finland_with_relative_traffic_rates.csv'
# file2 = './output/regions/europe/genral_labeled_data_with_relative_traffic_rates/Technology and Science/UK_with_relative_traffic_rates.csv'

comparison_result = compare_traffic_rates(file1, file2)

if comparison_result:
    similarity_score, total_comparisons = comparison_result
    if total_comparisons == 0: #check if files are empty
        print("Error: one or both files are empty or have only the header")
    else:
        print(f"Similarity Score: {similarity_score}")
        print(f"Total Comparisons: {total_comparisons}")
        print(f"Similarity Percentage: {(similarity_score / total_comparisons) * 100:.2f}%")

Similarity Score: 70
Total Comparisons: 158
Similarity Percentage: 44.30%
