In [31]:
import re
import json

def convert_abbreviation(value):
    """ Converts abbreviated numbers like '2.05K' into normal numbers (e.g., 2050) """
    match = re.match(r'([\d.]+)([KMB]?)', value)
    if not match:
        return value
    num, suffix = match.groups()
    num = float(num)

    if suffix == "K":
        num *= 1_000
    elif suffix == "M":
        num *= 1_000_000
    elif suffix == "B":
        num *= 1_000_000_000

    return int(num) if num.is_integer() else num


def contains_numbered_symbol(text):
    return "‚Ññ" in text


def remove_price_ranges(data_list):
    """ Removes number ranges like 0.0733-0.0735 """
    return [item for item in data_list if not re.match(r'^\d+(\.\d+)?-\d+(\.\d+)?$', item)]


def extract_futures_exchanges(text):
    """ Extracts exchange names from the '‚úÖ–§—å—é—á–µ—Ä—Å—ã' line """
    futures_match = re.search(r'‚úÖ–§—å—é—á–µ—Ä—Å—ã: (.+)', text)
    if futures_match:
        exchanges = re.findall(r'\[([^\]]+)\]', futures_match.group(1))
        return exchanges
    return []


def clean_text(text):

    is_contain = contains_numbered_symbol(text)
    print("Text contains '‚Ññ' symbol:", is_contain)

    futures_exchanges = extract_futures_exchanges(text)
    print("Futures Exchanges:", futures_exchanges)

    text = re.sub(r'[^\w\s.,:|$()/\[\]-]', '', text, flags=re.UNICODE)

    lines = text.strip().split("\n")
    if len(lines) > 2:
        text = "\n".join(lines[1:-1])

    text = re.sub(r'https?://\S+', '', text)
    text = re.sub(r'‚Ññ\d+', '', text)
    text = re.sub(r'\[([^\]]+)\]', r'\1', text)
    text = re.sub(r'\b[–ê-–Ø–∞-—è–Å—ë]+\b', '', text)
    text = re.sub(r'[|,|/]', '', text)
    text = re.sub(r'[()]', '', text)
    text = re.sub(r'\s+', ' ', text).strip()
    text = re.sub(r'[:$]', '', text)

    l_data = text.split()
    l_data = remove_price_ranges(l_data)


    if is_contain:
        quantity_from = convert_abbreviation(l_data[5])
        quantity_to = convert_abbreviation(l_data[10])
        d = {
            "exchange_from": l_data[2],
            "price_from": l_data[3],
            "quantity_from": quantity_from,
            'orders_count_from': l_data[6],
            "exchange_to": l_data[7],
            "price_to": l_data[8],
            "quantity_to": quantity_to,
            "orders_count_to": l_data[11],
            "token": l_data[0].replace('USDT', ''),
            "futures_exchanges": futures_exchanges
        }
    else:
        quantity_from = convert_abbreviation(l_data[4])
        quantity_to = convert_abbreviation(l_data[9])
        d = {
            "exchange_from": l_data[1],
            "price_from": l_data[2],
            "quantity_from": quantity_from,
            'orders_count_from': l_data[5],
            "exchange_to": l_data[6],
            "price_to": l_data[7],
            "quantity_to": quantity_to,
            "orders_count_to": l_data[11],
            "token": l_data[0].replace('USDT', ''),
            "futures_exchanges": futures_exchanges
        }
    print(d)

    with open("settings.json", "r") as f:
        data = json.load(f)
    if d['exchange_from'] not in data['exchanges_from']:
        print("‚ùå Error: Exchange mismatch (Expected MEXC -> BingX)")
        return False, None
    first_match = next((exchange for exchange in d['futures_exchanges'] if exchange in d['exchange_to']), None)
    if first_match is None:
        return False, None
    d['exchange_to'] = first_match
    return True, d

In [32]:
with open("signal_example.txt", "r") as file:
    content = file.read()
    print(content)  # Prints the entire content of the file

‚úÖBURGER: BingX‚ÜíKuCoin 3411.7 +66.1$ (1.93%)

`BURGER`/USDT: ‚Ññ[1431](https://coinmarketcap.com/currencies/burger-cities/)

üìó| [BingX](https://bingx.com/ru-ru/spot/BURGERUSDT/) | [–≤—ã–≤–æ–¥](https://bingx.com/ru-ru/assets/withdraw/) |
–¶–µ–Ω–∞: `0.0734786` [0.0733-`0.0735`]
–û–±—ä–µ–º: **3411.7 $**, 46.43K, 3 –æ—Ä–¥–µ—Ä–∞

üìï| [KuCoin](https://www.kucoin.com/ru/trade/BURGER-USDT) | [–≤–≤–æ–¥](https://www.kucoin.com/ru/assets/coin/BURGER) |
–¶–µ–Ω–∞: `0.0750892` [0.0784-`0.0737`]
–û–±—ä–µ–º: **3486.4 $**, 46.43K, 46 –æ—Ä–¥–µ—Ä–æ–≤

–ö–æ–º–∏—Å—Å–∏—è: —Å–ø–æ—Ç **8.6$** / –ø–µ—Ä–µ–≤–æ–¥ **0.24$** (3.2 BURGER)
–°–µ—Ç—å: BEP20
üü¢ 1-3 –º–∏–Ω—É—Ç—ã (15 –ø–æ–¥—Ç. ~ 1 –º–∏–Ω)
üïë –í—Ä–µ–º—è –∂–∏–∑–Ω–∏: 07:31
üí∞ –ß–∏—Å—Ç—ã–π —Å–ø—Ä–µ–¥: **66.1$** (**1.93%**)
‚úÖ–ú–∞—Ä–∂–∏–Ω–∞–ª—å–Ω—ã–π –∑–∞—ë–º
üëç[–ö–æ–Ω—Ç—Ä–∞–∫—Ç—ã](https://bscscan.com/token/0xae9269f27437f0fcbc232d39ec814844a51d6b8f) —Å–æ–≤–ø–∞–¥–∞—é—Ç

‚úÖ–§—å—é—á–µ—Ä—Å—ã: [MEXC](https://futures.mexc.com/exchange/BURGER_USDT) 

In [33]:
is_valid, data = cleaned_text = clean_text(content)
print(is_valid, data)

Text contains '‚Ññ' symbol: True
Futures Exchanges: ['MEXC', 'Bitget', 'XT']
{'exchange_from': 'BingX', 'price_from': '0.0734786', 'quantity_from': 46430, 'orders_count_from': '3', 'exchange_to': 'KuCoin', 'price_to': '0.0750892', 'quantity_to': 46430, 'orders_count_to': '46', 'token': 'BURGER', 'futures_exchanges': ['MEXC', 'Bitget', 'XT']}
False None


In [20]:
exchanges_to_check = ['MEXC', 'Bitget', 'XT']

In [21]:
current_exchanges = ["BingX", "Binance", "KuCoin", "ByBit", "BitMart"]

In [24]:
exchanges_to_check = ['MEXC', 'Bitget', 'XT', 'KuCoin']
current_exchanges = ["BingX", "Binance", "KuCoin", "ByBit", "BitMart"]

first_match = next((exchange for exchange in exchanges_to_check if exchange in current_exchanges), None)

if first_match:
    print("First matching exchange:", first_match)
else:
    print("No matching exchanges found.")

First matching exchange: KuCoin
