In [62]:
import numpy as np
import pandas as pd
import re



- data.txt: McConnel Ma 2013
- apj522042t1_ascii.txt: Shankar 2016, Table 1

# McConnell Ma 2013

In [63]:
mm13_fn = 'data.txt'
mm13_columns = ['Galaxy','Distance','Mbh','Mbh lower','Mbh upper','Morphology',]
mm13_col_index = [0,1,2,3,4,15]

# Read the file, selecting only specific columns
mm13_df = pd.read_csv(mm13_fn,
                      skiprows=22,
                      delim_whitespace=True, 
                      names=mm13_columns, 
                      usecols=mm13_col_index,)

mm13_df['Mbh'] = mm13_df['Mbh'] / 1e8
mm13_df['Mbh lower'] = mm13_df['Mbh lower'] / 1e8
mm13_df['Mbh upper'] = mm13_df['Mbh upper'] / 1e8

mm13_df['Source'] = 'MM13'


# Shankar 2016

In [64]:
def parse_mbh(s, float_precision=3):
    # Regular expression pattern to capture numbers (integers or floats)
    pattern = r'\$\{([+-]?\d*\.?\d+)\}_\{([+-]?\d*\.?\d+)\}\^\{([+-]?\d*\.?\d+)\}\$'
    match = re.match(pattern, s)
    
    if match:
        mbh = round(float(match.group(1)), float_precision)
        mbh_low = round(float(match.group(2)), float_precision)
        mbh_high = round(float(match.group(3)), float_precision)
        return mbh, mbh+mbh_low, mbh+mbh_high
    else:
        raise ValueError("Input string is not in the expected format.")



# Read data from file
filename = 'apj522042t1_ascii.txt'
rows = []

with open(filename, 'r') as file:
    for i, line in enumerate(file):
        if i < 7:  # Skip the first 6 lines
            continue
        
        elements = line.split('\t')
        if len(elements) < 5:  # Skip lines that don't have enough columns
            continue
        
        # Extract the relevant data
        galaxy = elements[0]
        morphology = elements[1]
        distance = float(elements[3])
        mbh, mbh_lower, mbh_upper = parse_mbh(elements[4],
                                              float_precision=3)
        
        # Append the row of extracted data
        rows.append({
            'Galaxy': galaxy,
            'Distance': distance,
            'Mbh': mbh,
            'Mbh lower': mbh_lower,
            'Mbh upper': mbh_upper,
            'Morphology': morphology
        })

# Create a pandas dataframe
s16_df = pd.DataFrame(rows)

# add source
s16_df['Source'] = 'S16'

# Display the dataframe
print(s16_df)

       Galaxy  Distance     Mbh  Mbh lower  Mbh upper Morphology Source
0     IC 1459      28.4  24.000     14.000     34.000          E    S16
1     IC 2560      40.7   0.044      0.022      0.088   Sp (bar)    S16
2     IC 4296      40.7  11.000      9.000     13.000          E    S16
3         M31       0.7   1.400      1.100      2.300   Sp (bar)    S16
4         M49      17.1  25.000     24.000     28.000          E    S16
..        ...       ...     ...        ...        ...        ...    ...
61   NGC 6251     104.6   5.000      3.000      7.000          E    S16
62   NGC 7052      66.4   3.700      2.200      6.300          E    S16
63   NGC 7619      51.5  25.000     22.000     33.000          E    S16
64   NGC 7768     112.8  13.000      9.000     18.000          E    S16
65  UGC 03789      48.4   0.108      0.103      0.113   Sp (bar)    S16

[66 rows x 7 columns]


In [71]:
s16_df.iloc[3]

Galaxy             M31
Distance           0.7
Mbh                1.4
Mbh lower          1.1
Mbh upper          2.3
Morphology    Sp (bar)
Source             S16
Name: 3, dtype: object

# Combining All Dataframes and Writing

In [89]:
def save_dataframe_to_txt(df, filename='fulldatabase.txt', precision=6):
    # Get the column names from the dataframe
    headers = df.columns.tolist()
    
    # Open the file in write mode
    with open(filename, 'w') as f:
        # Write the header
        f.write("\t".join(headers) + "\n")
        
        # Iterate through the dataframe rows
        for idx, row in df.iterrows():
            # Convert each row to string, formatting floating-point values to the specified precision
            row_str = "\t".join(
                f"{val:.{precision}f}".rstrip('0').rstrip('.') if isinstance(val, float) else str(val) 
                for val in row
            )
            # Write the row to the file
            f.write(row_str + "\n")
    
    print(f"Dataframe saved to {filename}")


In [91]:
# Assuming df1 and df2 are your two DataFrames
merged_df = pd.concat([mm13_df, s16_df], ignore_index=True)

In [92]:
save_dataframe_to_txt(merged_df,filename='fulldatabase.txt')

Dataframe saved to fulldatabase.txt
