In [4]:
import re

def correct_misidentifications(text):
    corrections = {
        'O': '0',
        'G': '6',
        'B': '8',
        'I': '1',
        'S': '5',
        'Z': '2',
        'Q': '0',
        'T': '7',
        'L': '1',
        'A': '4'
    }
    
    corrected_text = text
    for char, replacement in corrections.items():
        corrected_text = corrected_text.replace(char, replacement)
    
    corrected_text = re.sub(r'[^\d.]+', '', corrected_text)  # Keep only numeric characters and decimal points
    return corrected_text

string = "O.GB123AL"
print(correct_misidentifications(string))  

0.6812341


In [8]:
def process_y_axis(y_axis):
    values = y_axis.split(',')
    corrected_values = []
    removed_indices = []
    
    for index, value in enumerate(values):
        # If the value contains a space not at the beginning or end, replace with a decimal point
        value = re.sub(r'(?<=\S) (?=\S)', '.', value)
        
        # If the value contains both digits and characters
        if re.search(r'\d', value) and re.search(r'[a-zA-Z]', value):
            corrected_value = correct_misidentifications(value)
        else:
            corrected_value = value

        # If the corrected value is still a valid float, add it to the results
        try:
            corrected_values.append(float(corrected_value))
        except ValueError:
            removed_indices.append(index)  # Save the index of the value that was removed

    return corrected_values, removed_indices

y_axis = '0.36,0.34,0.32,0.30,0.28,0.26,0.24,0.22,0.20,0 19,0.18,0.16,0.14,0.12,O.100,0.080,0.060,0.040,Cap'
corrected_values, removed_indices = process_y_axis(y_axis)
print(corrected_values)
print(removed_indices)  # Indices of elements that were removed

[0.36, 0.34, 0.32, 0.3, 0.28, 0.26, 0.24, 0.22, 0.2, 0.19, 0.18, 0.16, 0.14, 0.12, 0.1, 0.08, 0.06, 0.04]
[18]


In [9]:
import re

def extract_currency(string):
    # Dictionary to map symbols and misidentified symbols to currency names
    currency_map = {
        '$': 'USD',
        '€': 'EUR',
        '£': 'GBP',
        '¥': 'JPY',
        'A$': 'AUD',
        'C$': 'CAD',
        'S': 'USD',   # Misidentification for $
        'E': 'EUR',   # Misidentification for €
        'L': 'GBP',   # Misidentification for £
        'Y': 'JPY',   # Potential misidentification for ¥
    }
    
    # Regular expression to find any of the currency symbols, abbreviations or misidentified symbols
    pattern = re.compile(r'\$|€|£|¥|A\$|C\$|USD|EUR|GBP|JPY|AUD|CAD|S(?!\w)|E(?!\w)|L(?!\w)|Y(?!\w)')
    # The (?!\w) ensures we're not matching these in the middle of words
    match = pattern.search(string)
    
    if match:
        symbol = match.group()
        # Map the symbol or misidentified symbol to the currency code if it exists in the dictionary
        return currency_map.get(symbol, symbol).lower()
    else:
        return None

# Test the function
print(extract_currency("Price is 100S"))
print(extract_currency("Price is 100$"))
print(extract_currency("Price is 100Euros"))
print(extract_currency("Price is 100€"))
print(extract_currency("Price is 100L"))
print(extract_currency("Price is 100£"))
print(extract_currency("Price is 100Yen"))
print(extract_currency("Price is 100¥"))

usd
usd
None
eur
gbp
gbp
None
jpy


In [21]:
import re

def extract_percentage(string):
    match = re.search(r'([-+]?\d+(?:\.\d+)?)\s*%', string)
    if match:
        percentage = float(match.group(1))
        return percentage
    return None

string1 = "S78.47,293%"
string2 = "S0.04254 USD -43.45%,(6M)"

print(extract_percentage(string1)) # Output: 293.0
print(extract_percentage(string2)) # Output: -43.45

293.0
-43.45


In [25]:
def add_dot_if_needed(value):
    value_str = str(value)

    # Check if the first digit is '0' and the second character is not '.'
    if value_str[0] == '0' and (len(value_str) == 1 or value_str[1] != '.'):
        value_str = '0.' + value_str[1:]

    # If the original value was a number, return as a float; otherwise, return as a string
    if isinstance(value, (int, float)):
        return float(value_str)
    return value_str

string_value = "05"
number_value = 5

result_string = add_dot_if_needed(string_value)
result_number = add_dot_if_needed(number_value)

print("Result from string:", result_string)  # Output: "0.5"
print("Result from number:", result_number)  # Output: 0.5


Result from string: 0.5
Result from number: 5.0
