In [None]:
import pandas as pd
import numpy as np
import os
import re
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from collections import defaultdict
from IPython.display import display
from fuzzywuzzy import fuzz, process
from ITUtils import country_conflicts_finder
from pandarallel import pandarallel

# show all columns
pd.set_option('display.max_rows', None)

In [None]:
# pick a country and import the conflicts
adm = 'G'
file = 'conflicts_summary_condensed.csv'
filepath = os.path.join('.', 'adm_conflicts', adm, file)
df = pd.read_csv(filepath, low_memory=False)
display(df)
print(df.columns)

In [None]:

# logic, if the entries are identical for each pair, then keep only the second one of the pair if the overlap is 100%

In [None]:
# steps:
# find the uhf100 subset of the dataframe where the string 'UHF' is contained in the column 'TPA-1 Beam', and the string '100.0%' is contained in the column 'Overlap (worst case)', use this dataframe for following  
# divide the dataframe in sub dataframes where the columns Network, Beam, and Overlap (worst case) are identical
# for every pair in pairs check in every sub dataframe if both the strings are contained, if so add the 9.5 kHz one to a discarded dataframe
# merge back the dataframe and return the simplified and discarded dataframes

In [None]:
def simplify_uhf_conflicts(df):
    # Return two empty DataFrames if input is empty
    if df.empty:
        return pd.DataFrame(), pd.DataFrame()
    
    pairs = [
        ['UHFUP fc=401.96MHz BW=9.5kHz', 'UHFUP fc=401.96MHz BW=19.8kHz'],
        ['UHFDN fc=401.96MHz BW=9.5kHz', 'UHFDN fc=401.96MHz BW=19.8kHz'],
        ['UHFUP fc=401.90MHz BW=9.5kHz', 'UHFUP fc=401.90MHz BW=19.8kHz'],
        ['UHFDN fc=401.90MHz BW=9.5kHz', 'UHFDN fc=401.90MHz BW=19.8kHz'],
    ]
    # Step 1: Filter for only rows where 'UHF' in 'TPA-1 Beam' and '100.0%' in Overlap
    uhf100 = df[
        df['TPA-1 Beam'].astype(str).str.contains('UHF', na=False) &
        df['Overlap (worst case)'].astype(str).str.contains('100.0%', na=False)
        ].copy()

    # Step 2: Create a unique group identifier based on Network, Beam, and Overlap
    uhf100['group_id'] = (
            uhf100['Network'].astype(str) + '__' +
            uhf100['Beam'].astype(str) + '__' +
            uhf100['Overlap (worst case)'].astype(str)
    )

    # Step 3: For each group, check if both pair entries exist, and mark 9.5kHz one for discarding
    discard_rows = []
    grouped = uhf100.groupby('group_id')

    for group_id, subdf in grouped:
        tpa_beams = subdf['TPA-1 Beam'].tolist()
        for low, high in pairs:
            if low in tpa_beams and high in tpa_beams:
                discard_row = subdf[subdf['TPA-1 Beam'] == low]
                discard_rows.append(discard_row)

    # Combine rows to discard
    if discard_rows:
        discard_df = pd.concat(discard_rows)
    else:
        discard_df = pd.DataFrame(columns=df.columns)

    # Step 4: Return cleaned dataframe and discarded rows
    df_cleaned = df.drop(index=discard_df.index).reset_index(drop=True)
    discard_df = discard_df.reset_index(drop=True)

    return df_cleaned, discard_df

In [None]:
# usage
cleaned, discarded = simplify_uhf_conflicts(df)


In [None]:
display(cleaned)

In [None]:
display(discarded)

In [None]:
import os

# === CONFIG ===
tpafile = './databases/TPAtable.csv'
tablesfolder = 'countriestables'
outfolder = 'adm_conflicts'
countrieslistfile = 'countrieslist.csv'

# Load country codes
with open(countrieslistfile, 'r') as f:
    countries = f.read().strip().split(', ')
# # todo comment this
# countries = ['ARG']

# === PROCESS EACH COUNTRY ===
for ccode in countries:
    print(f"\n=== Processing {ccode} ===")

    # outfolder (must already exist)
    country_outfolder = os.path.join(outfolder, ccode)

    # Read data for the 'expanded_combined_tables_conflicts_lettersatnames.csv' file
    adm = ccode
    file = 'conflicts_summary_condensed.csv'
    filepath = os.path.join('.', 'adm_conflicts', adm, file)
    df = pd.read_csv(filepath, low_memory=False)

    # Generate summary pivot table
    summary_pivot, discarded = simplify_uhf_conflicts(df)

    # save
    outpath = os.path.join(country_outfolder, 'conflicts_summary_condensed_clean.csv')
    summary_pivot.to_csv(outpath, index=False)
    print('Summary condensed saved to ', outpath)
    outpath = os.path.join(country_outfolder, 'conflicts_summary_condensed_redundant.csv')
    discarded.to_csv(outpath, index=False)
    print('Summary condensed saved to ', outpath)

    # Read data for the 'expanded_combined_tables_conflicts_othersatnames.csv' file
    file = 'conflicts_summary_condensed_othersatnames.csv'
    filepath = os.path.join('.', 'adm_conflicts', adm, file)
    df = pd.read_csv(filepath, low_memory=False)

    # Generate summary pivot table for other satellite names
    summary_pivot_othersatnames, discarded_other = simplify_uhf_conflicts(df)
    outpath = os.path.join(country_outfolder, 'conflicts_summary_condensed_clean_othersatnames.csv')
    summary_pivot_othersatnames.to_csv(outpath, index=False)
    print('Summary condensed for other satellite names saved to ', outpath)
    outpath = os.path.join(country_outfolder, 'conflicts_summary_condensed_othersatnames_redundant.csv')
    discarded_other.to_csv(outpath, index=False)
    print('Summary condensed for other satellite names saved to ', outpath)

