In [None]:
import json
import re
import polars as pl
from dateutil.parser import parse
from google.colab import files
import ipywidgets as widgets
from IPython.display import display, clear_output

# Step 1: Initialize an empty list to hold the processed data
data = []

# Step 2: Upload JSON files regarding each state from system 6
print("Upload JSON files one by one:")
uploaded = files.upload()

# Step 3: Process each uploaded JSON file
for file_name in uploaded.keys():
    #Take variable of the states.
    state = file_name.split('.')[0].replace('_releases', '')
    state = re.sub(r'([a-z])([A-Z])', r'\1 \2', state).title()
    state = re.sub(r'\s\(\d+\)', '', state)

    #Open json files.
    with open(file_name, 'r', encoding='utf-8') as file:
        try:
            json_data = json.load(file)
            if isinstance(json_data, dict):
                json_data = [json_data]
        except json.JSONDecodeError:
            print(f"Error decoding JSON from file: {file_name}")
            continue

        for entry in json_data:
            # Extract necessary fields
            '''
            Variables:
            - Id of the contract
            - Description of the tender
            - Supplier
            - Value of the tender
            - Value of the contract
            - Period of the tender
            '''
            contract_id = entry.get("_id", {}).get("$oid", None) if isinstance(entry.get("_id", {}), dict) else entry.get("_id", None)
            tender_description = entry.get("tender", {}).get("description", "") or "No hay información"
            tender_value = entry.get("tender", {}).get("value", {}).get("amount", 0)
            contracts = entry.get("contracts", [])
            awards = entry.get("awards", [])
            awards_supplier = ", ".join(
                [supplier.get("name", "") for award in awards for supplier in award.get("suppliers", [])]
            ) or "No hay información"
            total_contract_value = sum(contract.get("value", {}).get("amount", 0) for contract in contracts) #This is because some contracts have more than one value.

            # Calculate the duration in days. If it's less that 15, then there's a red flag.
            period = entry.get("tender", {}).get("tenderPeriod", {})
            start_date = period.get("startDate")
            end_date = period.get("endDate")

            try:
                if start_date and end_date:
                    start_date_parsed = parse(start_date)
                    end_date_parsed = parse(end_date)
                    days_duration = (end_date_parsed - start_date_parsed).days
                    temporal_flag = '🔴' if days_duration < 15 else '🟢'
                else:
                    days_duration = None
                    temporal_flag = None
            except (ValueError, TypeError):
                print(f"Error parsing dates in file {file_name}")
                days_duration = None
                temporal_flag = None

            # Calculate bid-budget percentage difference and flag, if its less or equal to 30%, there's a red flag.
            bid_budget_percent_diff = (
                ((total_contract_value - tender_value) / tender_value) * 100 if tender_value else 0
            )
            red_flag_bid_tender = '🔴' if bid_budget_percent_diff >= 30 else '🟢'

            # Count the number of red flags for each row
            red_flag_count = sum([1 for flag in [temporal_flag, red_flag_bid_tender] if flag == '🔴'])

            # Append to data
            data.append({
                "Id": contract_id,
                "Estado": state,
                "Valor tender": tender_value,
                "Cantidad de contrato": total_contract_value,
                "Flag_tender": red_flag_bid_tender,
                "Descripción de contrato": tender_description,
                "Proveedores": awards_supplier,
                "Periodo de Tiempo (días)": days_duration if days_duration is not None else "No hay información",
                "Flag_temporal": temporal_flag if temporal_flag is not None else "No hay información",
                "Sum": red_flag_count
            })

# Step 4: Create a Polars DataFrame
df = pl.DataFrame(data)

# Helper function for filtering (flags, state, id and suppliers).
def filter_data(flag_type, state_filter, id_filter, supplier_filter):
    filtered_df = df
    if flag_type == 'Red Flags':
        filtered_df = filtered_df.filter(pl.col('Flag_tender') == '🔴')
    elif flag_type == 'Green Flags':
      filtered_df = filtered_df.filter((pl.col('Flag_tender') == '🟢') & (pl.col('Flag_temporal') != '🔴'))
    if state_filter:
        filtered_df = filtered_df.filter(pl.col('Estado').str.to_lowercase().str.contains(state_filter.lower()))
    if id_filter:
        filtered_df = filtered_df = filtered_df.filter(pl.col('Id').str.to_lowercase().str.contains(id_filter.lower()))
    if supplier_filter:
        filtered_df = filtered_df = filtered_df.filter(pl.col('Proveedores').str.to_lowercase().str.contains(supplier_filter.lower()))
    return filtered_df

# Create Widgets to make it interactive for the users.
flag_dropdown = widgets.Dropdown(
    options=['All Flags', 'Red Flags', 'Green Flags'],
    value='All Flags',
    description='Bandera:',
    layout=widgets.Layout(width='200px')
)

state_text = widgets.Text(
    value='',
    placeholder='Estado',
    description='Estado:',
    layout=widgets.Layout(width='300px')
)

id_text = widgets.Text(
    value='',
    placeholder='ID del contrato',
    description='ID:',
    layout=widgets.Layout(width='300px')
)

supplier_text = widgets.Text(
    value='',
    placeholder='Proveedor',
    description='Proveedor:',
    layout=widgets.Layout(width='300px')
)

#Create counter for all red and green flags.
red_flag_count = widgets.Label(value="🔴 Red Flags: 0")
green_flag_count = widgets.Label(value="🟢 Green Flags: 0")

# Update flag counts
def update_flag_counts(filtered_df):
    red_count = filtered_df.filter(pl.col('Flag_temporal') == '🔴').shape[0]
    green_count = filtered_df.filter(pl.col('Flag_temporal') == '🟢').shape[0]
    red_flag_count.value = f"🔴 Red Flags: {red_count}"
    green_flag_count.value = f"🟢 Green Flags: {green_count}"

# Update function
output = widgets.Output()

#Update the table to show results from the filters.
def update_table(change=None):
    with output:
        clear_output(wait=True)
        filtered_df = filter_data(
            flag_dropdown.value,
            state_text.value,
            id_text.value,
            supplier_text.value
        )
        display(filtered_df.to_pandas().style.set_properties(**{'text-align': 'left'}))
        #Update result of the counter.
        red_flags = (filtered_df['Flag_tender'] == '🔴').sum() + (filtered_df['Flag_temporal'] == '🔴').sum()
        green_flags = (filtered_df['Flag_tender'] == '🟢').sum() + (filtered_df['Flag_temporal'] == '🟢').sum()
        red_flag_count.value = f"🔴 Red Flags: {red_flags}"
        green_flag_count.value = f"🟢 Green Flags: {green_flags}"

# Link widgets to update function
flag_dropdown.observe(update_table, names='value')
state_text.observe(update_table, names='value')
id_text.observe(update_table, names='value')
supplier_text.observe(update_table, names='value')

# Display widgets and output
display(widgets.VBox([
    widgets.HBox([red_flag_count, green_flag_count]),
    widgets.HBox([flag_dropdown, state_text, id_text, supplier_text]),
    output
]))

# Initial display
update_table()