In [1]:
from mendeleev import element
import numpy as np
import re
import pandas as pd
import ast
from pymatgen.symmetry.groups import SpaceGroup

# Define the function to calculate descriptors
def calculate_descriptors(formula):
    # Parse the formula and count the elements and their stoichiometric ratios
    pattern = re.compile(r"([A-Z][a-z]*)(\d*)")
    elements = pattern.findall(formula)

    # Convert elements list into a dictionary with counts as integers
    element_counts = {}
    for symbol, count in elements:
        count = int(count) if count else 1
        element_counts[symbol] = count

    # Retrieve each element's atomic properties
    atomic_numbers = []
    electronegativities = []
    atomic_radii = []
    atomic_masses = []
    valence_electrons = []

    for symbol, count in element_counts.items():
        elem = element(symbol)
        atomic_numbers.extend([elem.atomic_number] * count)
        electronegativities.extend([elem.en_pauling] * count)
        atomic_radii.extend([elem.atomic_radius] * count)
        atomic_masses.extend([elem.atomic_weight] * count)
        valence_electrons.extend([elem.nvalence()] * count)

    # Calculate descriptors

    # Elemental Properties
    mean_electronegativity = np.mean(electronegativities)
    atomic_number_diff = max(atomic_numbers) - min(atomic_numbers)
    avg_atomic_radius = np.mean(atomic_radii)

    # Compositional Features
    avg_atomic_mass = np.mean(atomic_masses)

    # Statistical Measures of Elemental Properties
    std_electronegativity = np.std(electronegativities)
    min_atomic_radius = np.min(atomic_radii)
    max_atomic_radius = np.max(atomic_radii)

    # Heuristic Quantities
    electronegativity_diff = max(electronegativities) - min(electronegativities)
    mean_valence_electron_count = np.mean(valence_electrons)

    # Organize descriptors into a vector
    feature_vector = [
        mean_electronegativity,        # 1. Mean Electronegativity
        atomic_number_diff,            # 2. Atomic Number Difference
        avg_atomic_radius,             # 3. Average Atomic Radius
        avg_atomic_mass,               # 4. Average Atomic Mass
        std_electronegativity,         # 5. Standard Deviation of Electronegativity
        min_atomic_radius,             # 6. Minimum Atomic Radius
        max_atomic_radius,             # 7. Maximum Atomic Radius
        electronegativity_diff,        # 8. Electronegativity Difference
        mean_valence_electron_count    # 9. Mean Valence Electron Count
    ]

    return feature_vector



def get_properties(row):

    e_f = ast.literal_eval(row['formation_energy'])['value']
    
    
    return  e_f

In [2]:
df1 = pd.read_csv('./Data/1_MatDX/MatDX_nomad_EF_Binary.csv')
df2 = pd.read_csv('./Data/1_MatDX/MatDX_nomad_EF_Ternary.csv')

# Concatenate the DataFrames
df = pd.concat([df1, df2], ignore_index=True)

In [3]:
import json
from tqdm import tqdm

target = {}
descriptors = {}

# Open the JSON file in append/update mode
output_file = 'descriptors.json'
try:
    # Load existing data if the file already exists
    with open(output_file, 'r') as file:
        data = json.load(file)
except (FileNotFoundError, json.JSONDecodeError):
    # If the file doesn't exist or is empty, initialize an empty dictionary
    data = {}

# Loop through the DataFrame and process each row
for index, row in tqdm(df.iterrows(), total=len(df), desc="Processing Rows"):
    try:
        formula = row['formula']
        e_f = get_properties(row)
        target[formula] = e_f
        descriptor_values = calculate_descriptors(formula)
        descriptors[formula] = descriptor_values

        # Store the descriptor and e_f in the desired format
        data[formula] = [descriptor_values, e_f]
        
        # Save to JSON file after each entry (or periodically for large datasets)
        with open(output_file, 'w') as file:
            json.dump(data, file, indent=4)
            
    except Exception as e:
        print(f"Error processing row {index}: {e}")
        pass

# Optionally, print the final number of descriptors processed
print("Total descriptors processed:", len(descriptors.keys()))


Processing Rows:   0%|          | 50/20000 [00:50<5:24:12,  1.03it/s]

Error processing row 49: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   1%|          | 127/20000 [01:49<3:57:26,  1.39it/s]

Error processing row 126: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:   1%|          | 182/20000 [02:26<3:40:34,  1.50it/s]

Error processing row 181: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   2%|▏         | 438/20000 [05:21<3:44:50,  1.45it/s]

Error processing row 437: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   2%|▏         | 466/20000 [05:42<4:02:57,  1.34it/s]

Error processing row 465: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   3%|▎         | 657/20000 [08:00<4:03:59,  1.32it/s]

Error processing row 656: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   4%|▍         | 768/20000 [09:23<4:06:02,  1.30it/s]

Error processing row 767: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   4%|▍         | 788/20000 [09:39<4:42:53,  1.13it/s]

Error processing row 787: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   4%|▍         | 789/20000 [09:40<4:57:32,  1.08it/s]

Error processing row 788: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:   5%|▍         | 976/20000 [12:15<3:58:51,  1.33it/s]

Error processing row 975: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   6%|▌         | 1145/20000 [14:20<3:53:02,  1.35it/s]

Error processing row 1144: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   6%|▋         | 1254/20000 [15:47<4:01:50,  1.29it/s]

Error processing row 1253: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:   6%|▋         | 1265/20000 [15:57<4:53:18,  1.06it/s]

Error processing row 1264: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:   7%|▋         | 1318/20000 [16:47<4:36:00,  1.13it/s]

Error processing row 1317: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   7%|▋         | 1352/20000 [17:20<5:15:15,  1.01s/it]

Error processing row 1351: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   7%|▋         | 1384/20000 [17:50<4:29:47,  1.15it/s]

Error processing row 1383: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:   7%|▋         | 1469/20000 [19:06<4:53:17,  1.05it/s]

Error processing row 1468: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   8%|▊         | 1545/20000 [20:13<4:43:39,  1.08it/s]

Error processing row 1544: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   8%|▊         | 1700/20000 [22:38<4:27:02,  1.14it/s]

Error processing row 1699: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   9%|▉         | 1775/20000 [23:44<4:28:32,  1.13it/s]

Error processing row 1774: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:   9%|▉         | 1842/20000 [24:45<4:39:11,  1.08it/s]

Error processing row 1841: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  10%|█         | 2041/20000 [28:02<5:02:58,  1.01s/it]

Error processing row 2040: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  10%|█         | 2076/20000 [28:34<4:38:46,  1.07it/s]

Error processing row 2075: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  12%|█▏        | 2428/20000 [33:56<4:37:18,  1.06it/s]

Error processing row 2427: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  12%|█▏        | 2492/20000 [34:56<4:31:48,  1.07it/s]

Error processing row 2491: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  12%|█▏        | 2495/20000 [34:59<4:56:57,  1.02s/it]

Error processing row 2494: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  13%|█▎        | 2505/20000 [35:09<4:54:31,  1.01s/it]

Error processing row 2504: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  14%|█▍        | 2790/20000 [39:31<4:29:43,  1.06it/s]

Error processing row 2789: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  14%|█▍        | 2823/20000 [40:01<4:20:52,  1.10it/s]

Error processing row 2822: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  14%|█▍        | 2861/20000 [40:37<4:28:18,  1.06it/s]

Error processing row 2860: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  15%|█▌        | 3044/20000 [43:30<4:51:03,  1.03s/it]

Error processing row 3043: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  16%|█▌        | 3120/20000 [44:43<5:05:26,  1.09s/it]

Error processing row 3119: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  16%|█▌        | 3130/20000 [44:53<4:24:30,  1.06it/s]

Error processing row 3129: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  16%|█▌        | 3212/20000 [46:09<4:26:27,  1.05it/s]

Error processing row 3211: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  16%|█▌        | 3243/20000 [46:38<4:09:50,  1.12it/s]

Error processing row 3242: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  16%|█▋        | 3285/20000 [47:17<4:28:06,  1.04it/s]

Error processing row 3284: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  16%|█▋        | 3295/20000 [47:27<4:25:59,  1.05it/s]

Error processing row 3294: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  17%|█▋        | 3320/20000 [47:52<4:47:16,  1.03s/it]

Error processing row 3319: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  17%|█▋        | 3340/20000 [48:11<4:34:59,  1.01it/s]

Error processing row 3339: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  17%|█▋        | 3362/20000 [48:33<4:15:31,  1.09it/s]

Error processing row 3361: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  17%|█▋        | 3429/20000 [49:39<5:39:56,  1.23s/it]

Error processing row 3428: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  17%|█▋        | 3476/20000 [50:29<3:58:25,  1.16it/s]

Error processing row 3475: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  17%|█▋        | 3489/20000 [50:41<4:17:40,  1.07it/s]

Error processing row 3488: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  18%|█▊        | 3535/20000 [51:24<4:13:18,  1.08it/s]

Error processing row 3534: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  18%|█▊        | 3576/20000 [52:03<3:59:00,  1.15it/s]

Error processing row 3575: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  18%|█▊        | 3618/20000 [52:43<4:13:46,  1.08it/s]

Error processing row 3617: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  18%|█▊        | 3653/20000 [53:17<4:45:28,  1.05s/it]

Error processing row 3652: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  19%|█▊        | 3706/20000 [54:03<3:47:28,  1.19it/s]

Error processing row 3705: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  19%|█▉        | 3757/20000 [54:47<3:52:08,  1.17it/s]

Error processing row 3756: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  19%|█▉        | 3776/20000 [55:04<3:42:03,  1.22it/s]

Error processing row 3775: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  19%|█▉        | 3867/20000 [56:28<4:14:53,  1.05it/s]

Error processing row 3866: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  19%|█▉        | 3899/20000 [56:58<4:09:47,  1.07it/s]

Error processing row 3898: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  20%|██        | 4032/20000 [59:04<3:45:22,  1.18it/s]

Error processing row 4031: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  21%|██        | 4135/20000 [1:00:41<4:08:21,  1.06it/s]

Error processing row 4134: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  21%|██        | 4191/20000 [1:01:35<3:58:47,  1.10it/s]

Error processing row 4190: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  21%|██        | 4214/20000 [1:01:59<4:14:15,  1.03it/s]

Error processing row 4213: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  21%|██▏       | 4287/20000 [1:03:14<4:15:14,  1.03it/s]

Error processing row 4286: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  22%|██▏       | 4360/20000 [1:04:25<4:29:39,  1.03s/it]

Error processing row 4359: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  22%|██▏       | 4391/20000 [1:04:55<4:02:54,  1.07it/s]

Error processing row 4390: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  22%|██▏       | 4396/20000 [1:05:00<4:17:31,  1.01it/s]

Error processing row 4395: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  22%|██▏       | 4429/20000 [1:05:32<4:16:00,  1.01it/s]

Error processing row 4428: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  22%|██▏       | 4463/20000 [1:06:06<4:24:22,  1.02s/it]

Error processing row 4462: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  22%|██▏       | 4488/20000 [1:06:30<3:58:03,  1.09it/s]

Error processing row 4487: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  23%|██▎       | 4565/20000 [1:07:40<3:34:59,  1.20it/s]

Error processing row 4564: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  24%|██▍       | 4770/20000 [1:10:33<3:41:27,  1.15it/s]

Error processing row 4769: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  24%|██▍       | 4791/20000 [1:10:51<3:23:59,  1.24it/s]

Error processing row 4790: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  24%|██▍       | 4793/20000 [1:10:53<3:33:03,  1.19it/s]

Error processing row 4792: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  24%|██▍       | 4877/20000 [1:12:01<3:40:57,  1.14it/s]

Error processing row 4876: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  25%|██▍       | 4918/20000 [1:12:38<3:17:40,  1.27it/s]

Error processing row 4917: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  25%|██▍       | 4929/20000 [1:12:48<3:51:25,  1.09it/s]

Error processing row 4928: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  25%|██▍       | 4940/20000 [1:12:57<3:24:57,  1.22it/s]

Error processing row 4939: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  25%|██▍       | 4966/20000 [1:13:18<3:34:56,  1.17it/s]

Error processing row 4965: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  25%|██▍       | 4973/20000 [1:13:25<3:35:07,  1.16it/s]

Error processing row 4972: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  25%|██▍       | 4987/20000 [1:13:36<3:17:53,  1.26it/s]

Error processing row 4986: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  25%|██▌       | 5035/20000 [1:14:17<3:16:39,  1.27it/s]

Error processing row 5034: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  25%|██▌       | 5040/20000 [1:14:21<3:19:07,  1.25it/s]

Error processing row 5039: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  25%|██▌       | 5041/20000 [1:14:21<3:14:45,  1.28it/s]

Error processing row 5040: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  25%|██▌       | 5051/20000 [1:14:30<3:20:24,  1.24it/s]

Error processing row 5050: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  26%|██▌       | 5115/20000 [1:15:25<3:33:51,  1.16it/s]

Error processing row 5114: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  26%|██▌       | 5127/20000 [1:15:35<3:27:40,  1.19it/s]

Error processing row 5126: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  28%|██▊       | 5535/20000 [1:21:12<3:06:22,  1.29it/s]

Error processing row 5534: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  28%|██▊       | 5554/20000 [1:21:28<2:56:40,  1.36it/s]

Error processing row 5553: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  28%|██▊       | 5565/20000 [1:21:37<3:21:00,  1.20it/s]

Error processing row 5564: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  28%|██▊       | 5613/20000 [1:22:16<2:56:05,  1.36it/s]

Error processing row 5612: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  29%|██▊       | 5713/20000 [1:23:38<3:12:17,  1.24it/s]

Error processing row 5712: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  29%|██▉       | 5812/20000 [1:24:58<3:03:03,  1.29it/s]

Error processing row 5811: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  29%|██▉       | 5830/20000 [1:25:12<2:50:04,  1.39it/s]

Error processing row 5829: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  29%|██▉       | 5873/20000 [1:25:46<3:01:00,  1.30it/s]

Error processing row 5872: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  30%|██▉       | 5942/20000 [1:26:43<3:03:21,  1.28it/s]

Error processing row 5941: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  30%|██▉       | 5956/20000 [1:26:55<3:15:27,  1.20it/s]

Error processing row 5955: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  30%|███       | 6096/20000 [1:28:51<3:18:41,  1.17it/s]

Error processing row 6095: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  31%|███       | 6104/20000 [1:28:58<3:10:37,  1.21it/s]

Error processing row 6103: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  31%|███       | 6178/20000 [1:30:00<2:56:02,  1.31it/s]

Error processing row 6177: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  31%|███       | 6220/20000 [1:30:34<2:55:34,  1.31it/s]

Error processing row 6219: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  31%|███       | 6249/20000 [1:30:57<2:55:40,  1.30it/s]

Error processing row 6248: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  31%|███▏      | 6250/20000 [1:30:58<2:57:29,  1.29it/s]

Error processing row 6249: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  32%|███▏      | 6468/20000 [1:33:56<3:08:30,  1.20it/s]

Error processing row 6467: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  32%|███▏      | 6490/20000 [1:34:15<3:03:17,  1.23it/s]

Error processing row 6489: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  33%|███▎      | 6542/20000 [1:34:58<3:05:52,  1.21it/s]

Error processing row 6541: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  33%|███▎      | 6571/20000 [1:35:22<3:07:01,  1.20it/s]

Error processing row 6570: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  33%|███▎      | 6616/20000 [1:36:00<3:29:20,  1.07it/s]

Error processing row 6615: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  33%|███▎      | 6658/20000 [1:36:35<3:01:39,  1.22it/s]

Error processing row 6657: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  34%|███▎      | 6734/20000 [1:37:39<3:14:43,  1.14it/s]

Error processing row 6733: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  34%|███▍      | 6763/20000 [1:38:03<2:54:23,  1.27it/s]

Error processing row 6762: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  35%|███▌      | 7032/20000 [1:41:49<3:08:27,  1.15it/s]

Error processing row 7031: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  35%|███▌      | 7086/20000 [1:42:35<2:50:27,  1.26it/s]

Error processing row 7085: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  36%|███▌      | 7116/20000 [1:43:00<2:55:46,  1.22it/s]

Error processing row 7115: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  36%|███▋      | 7268/20000 [1:45:07<3:08:37,  1.13it/s]

Error processing row 7267: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  36%|███▋      | 7283/20000 [1:45:19<2:46:42,  1.27it/s]

Error processing row 7282: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  37%|███▋      | 7413/20000 [1:47:10<2:47:40,  1.25it/s]

Error processing row 7412: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  38%|███▊      | 7534/20000 [1:48:51<2:54:35,  1.19it/s]

Error processing row 7533: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  38%|███▊      | 7543/20000 [1:48:58<2:47:54,  1.24it/s]

Error processing row 7542: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  38%|███▊      | 7565/20000 [1:49:17<2:46:55,  1.24it/s]

Error processing row 7564: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  38%|███▊      | 7572/20000 [1:49:22<2:31:34,  1.37it/s]

Error processing row 7571: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  38%|███▊      | 7634/20000 [1:50:13<2:44:26,  1.25it/s]

Error processing row 7633: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  38%|███▊      | 7658/20000 [1:50:34<2:41:21,  1.27it/s]

Error processing row 7657: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  39%|███▊      | 7715/20000 [1:51:19<2:41:01,  1.27it/s]

Error processing row 7714: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  39%|███▉      | 7763/20000 [1:51:59<2:50:28,  1.20it/s]

Error processing row 7762: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  39%|███▉      | 7767/20000 [1:52:02<2:49:10,  1.21it/s]

Error processing row 7766: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  39%|███▉      | 7776/20000 [1:52:09<2:40:45,  1.27it/s]

Error processing row 7775: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  39%|███▉      | 7870/20000 [1:53:28<2:30:47,  1.34it/s]

Error processing row 7869: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  40%|███▉      | 7941/20000 [1:54:28<2:51:58,  1.17it/s]

Error processing row 7940: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  42%|████▏     | 8432/20000 [2:01:24<2:36:04,  1.24it/s]

Error processing row 8431: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  42%|████▏     | 8482/20000 [2:02:07<2:28:21,  1.29it/s]

Error processing row 8481: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  42%|████▏     | 8491/20000 [2:02:14<2:38:24,  1.21it/s]

Error processing row 8490: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  43%|████▎     | 8504/20000 [2:02:25<2:35:24,  1.23it/s]

Error processing row 8503: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  43%|████▎     | 8558/20000 [2:03:10<2:31:29,  1.26it/s]

Error processing row 8557: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  43%|████▎     | 8587/20000 [2:03:34<2:42:30,  1.17it/s]

Error processing row 8586: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  43%|████▎     | 8590/20000 [2:03:37<2:40:09,  1.19it/s]

Error processing row 8589: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  43%|████▎     | 8611/20000 [2:03:54<2:34:14,  1.23it/s]

Error processing row 8610: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  44%|████▍     | 8788/20000 [2:06:26<2:36:52,  1.19it/s]

Error processing row 8787: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  44%|████▍     | 8855/20000 [2:07:24<2:38:10,  1.17it/s]

Error processing row 8854: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  45%|████▍     | 8902/20000 [2:08:04<2:30:44,  1.23it/s]

Error processing row 8901: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  45%|████▍     | 8954/20000 [2:08:47<2:26:54,  1.25it/s]

Error processing row 8953: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  45%|████▍     | 8975/20000 [2:09:05<2:46:08,  1.11it/s]

Error processing row 8974: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  45%|████▍     | 8979/20000 [2:09:08<2:28:01,  1.24it/s]

Error processing row 8978: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  45%|████▍     | 8983/20000 [2:09:12<2:30:26,  1.22it/s]

Error processing row 8982: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  45%|████▍     | 8990/20000 [2:09:17<2:37:30,  1.16it/s]

Error processing row 8989: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  45%|████▌     | 9015/20000 [2:09:38<2:24:22,  1.27it/s]

Error processing row 9014: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  45%|████▌     | 9027/20000 [2:09:48<2:25:15,  1.26it/s]

Error processing row 9026: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  45%|████▌     | 9083/20000 [2:10:34<2:22:37,  1.28it/s]

Error processing row 9082: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  45%|████▌     | 9090/20000 [2:10:40<2:26:52,  1.24it/s]

Error processing row 9089: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  46%|████▌     | 9141/20000 [2:11:24<2:20:17,  1.29it/s]

Error processing row 9140: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  46%|████▌     | 9157/20000 [2:11:37<2:23:16,  1.26it/s]

Error processing row 9156: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  46%|████▌     | 9175/20000 [2:11:53<2:24:14,  1.25it/s]

Error processing row 9174: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  46%|████▌     | 9226/20000 [2:12:36<2:22:50,  1.26it/s]

Error processing row 9225: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  46%|████▌     | 9241/20000 [2:12:49<2:28:51,  1.20it/s]

Error processing row 9240: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  46%|████▌     | 9249/20000 [2:12:56<2:31:05,  1.19it/s]

Error processing row 9248: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  46%|████▋     | 9267/20000 [2:13:11<2:23:24,  1.25it/s]

Error processing row 9266: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  47%|████▋     | 9336/20000 [2:14:09<2:32:49,  1.16it/s]

Error processing row 9335: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  47%|████▋     | 9340/20000 [2:14:13<2:27:46,  1.20it/s]

Error processing row 9339: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  47%|████▋     | 9351/20000 [2:14:22<2:21:27,  1.25it/s]

Error processing row 9350: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  47%|████▋     | 9453/20000 [2:15:48<2:18:50,  1.27it/s]

Error processing row 9452: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  48%|████▊     | 9541/20000 [2:17:02<2:18:42,  1.26it/s]

Error processing row 9540: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  48%|████▊     | 9565/20000 [2:17:22<2:23:17,  1.21it/s]

Error processing row 9564: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  48%|████▊     | 9615/20000 [2:18:06<2:17:50,  1.26it/s]

Error processing row 9614: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  48%|████▊     | 9625/20000 [2:18:14<2:20:53,  1.23it/s]

Error processing row 9624: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  48%|████▊     | 9631/20000 [2:18:19<2:15:34,  1.27it/s]

Error processing row 9630: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  48%|████▊     | 9660/20000 [2:18:43<2:07:02,  1.36it/s]

Error processing row 9659: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  49%|████▊     | 9737/20000 [2:19:49<2:24:53,  1.18it/s]

Error processing row 9736: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  49%|████▉     | 9820/20000 [2:20:59<2:07:18,  1.33it/s]

Error processing row 9819: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  49%|████▉     | 9852/20000 [2:21:26<2:19:41,  1.21it/s]

Error processing row 9851: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  49%|████▉     | 9895/20000 [2:22:04<2:19:11,  1.21it/s]

Error processing row 9894: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  50%|████▉     | 9970/20000 [2:23:08<2:27:18,  1.13it/s]

Error processing row 9969: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  50%|████▉     | 9982/20000 [2:23:19<2:28:06,  1.13it/s]

Error processing row 9981: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  50%|████▉     | 9999/20000 [2:23:33<2:18:50,  1.20it/s]

Error processing row 9998: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  52%|█████▏    | 10317/20000 [2:30:13<3:17:05,  1.22s/it]

Error processing row 10316: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  54%|█████▍    | 10773/20000 [2:39:48<3:04:34,  1.20s/it]

Error processing row 10772: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  54%|█████▍    | 10774/20000 [2:39:49<2:55:04,  1.14s/it]

Error processing row 10773: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  56%|█████▌    | 11103/20000 [2:46:41<3:08:49,  1.27s/it]

Error processing row 11102: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  56%|█████▌    | 11121/20000 [2:47:04<3:11:23,  1.29s/it]

Error processing row 11120: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  56%|█████▌    | 11211/20000 [2:48:56<3:01:08,  1.24s/it]

Error processing row 11210: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  57%|█████▋    | 11339/20000 [2:51:37<2:56:45,  1.22s/it]

Error processing row 11338: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  58%|█████▊    | 11517/20000 [2:55:21<2:39:19,  1.13s/it]

Error processing row 11516: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  58%|█████▊    | 11652/20000 [2:58:12<2:53:38,  1.25s/it]

Error processing row 11651: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  59%|█████▉    | 11778/20000 [3:00:54<2:46:20,  1.21s/it]

Error processing row 11777: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  59%|█████▉    | 11814/20000 [3:01:39<2:53:12,  1.27s/it]

Error processing row 11813: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  59%|█████▉    | 11846/20000 [3:02:19<2:48:25,  1.24s/it]

Error processing row 11845: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  61%|██████    | 12165/20000 [3:09:04<2:36:18,  1.20s/it]

Error processing row 12164: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  61%|██████    | 12177/20000 [3:09:19<2:37:42,  1.21s/it]

Error processing row 12176: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  61%|██████▏   | 12295/20000 [3:11:49<2:37:06,  1.22s/it]

Error processing row 12294: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  62%|██████▏   | 12412/20000 [3:14:19<2:36:17,  1.24s/it]

Error processing row 12411: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  63%|██████▎   | 12595/20000 [3:18:14<2:36:33,  1.27s/it]

Error processing row 12594: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  63%|██████▎   | 12675/20000 [3:19:55<2:31:24,  1.24s/it]

Error processing row 12674: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  65%|██████▌   | 13058/20000 [3:28:09<2:33:33,  1.33s/it]

Error processing row 13057: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  66%|██████▌   | 13118/20000 [3:29:27<2:20:15,  1.22s/it]

Error processing row 13117: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  67%|██████▋   | 13376/20000 [3:34:59<2:23:14,  1.30s/it]

Error processing row 13375: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  67%|██████▋   | 13456/20000 [3:36:44<2:21:11,  1.29s/it]

Error processing row 13455: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  70%|███████   | 14021/20000 [3:48:57<2:05:13,  1.26s/it]

Error processing row 14020: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  70%|███████   | 14046/20000 [3:49:29<2:02:57,  1.24s/it]

Error processing row 14045: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  70%|███████   | 14062/20000 [3:49:50<2:00:12,  1.21s/it]

Error processing row 14061: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  70%|███████   | 14098/20000 [3:50:36<2:03:15,  1.25s/it]

Error processing row 14097: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  71%|███████▏  | 14251/20000 [3:53:56<2:10:34,  1.36s/it]

Error processing row 14250: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  72%|███████▏  | 14413/20000 [3:57:27<1:57:43,  1.26s/it]

Error processing row 14412: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  72%|███████▏  | 14457/20000 [3:58:25<2:03:04,  1.33s/it]

Error processing row 14456: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  73%|███████▎  | 14556/20000 [4:00:36<2:00:12,  1.32s/it]

Error processing row 14555: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  73%|███████▎  | 14583/20000 [4:01:12<1:54:14,  1.27s/it]

Error processing row 14582: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  73%|███████▎  | 14697/20000 [4:03:42<1:53:03,  1.28s/it]

Error processing row 14696: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  74%|███████▎  | 14707/20000 [4:03:55<1:48:12,  1.23s/it]

Error processing row 14706: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  74%|███████▍  | 14858/20000 [4:07:10<1:46:16,  1.24s/it]

Error processing row 14857: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  74%|███████▍  | 14868/20000 [4:07:23<1:48:47,  1.27s/it]

Error processing row 14867: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  74%|███████▍  | 14894/20000 [4:07:58<1:49:36,  1.29s/it]

Error processing row 14893: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  75%|███████▍  | 14921/20000 [4:08:34<1:54:05,  1.35s/it]

Error processing row 14920: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  75%|███████▍  | 14973/20000 [4:09:49<1:41:34,  1.21s/it]

Error processing row 14972: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  76%|███████▌  | 15189/20000 [4:14:37<1:47:54,  1.35s/it]

Error processing row 15188: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  77%|███████▋  | 15330/20000 [4:17:47<1:44:52,  1.35s/it]

Error processing row 15329: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  77%|███████▋  | 15446/20000 [4:20:24<1:36:04,  1.27s/it]

Error processing row 15445: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  77%|███████▋  | 15453/20000 [4:20:32<1:34:26,  1.25s/it]

Error processing row 15452: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  77%|███████▋  | 15489/20000 [4:21:22<1:31:45,  1.22s/it]

Error processing row 15488: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  78%|███████▊  | 15500/20000 [4:21:36<1:32:58,  1.24s/it]

Error processing row 15499: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  78%|███████▊  | 15501/20000 [4:21:37<1:30:53,  1.21s/it]

Error processing row 15500: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  78%|███████▊  | 15507/20000 [4:21:45<1:29:15,  1.19s/it]

Error processing row 15506: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  78%|███████▊  | 15597/20000 [4:23:44<1:38:05,  1.34s/it]

Error processing row 15596: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  79%|███████▉  | 15806/20000 [4:28:23<1:26:08,  1.23s/it]

Error processing row 15805: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  79%|███████▉  | 15824/20000 [4:28:46<1:28:56,  1.28s/it]

Error processing row 15823: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  80%|███████▉  | 15926/20000 [4:30:59<1:27:30,  1.29s/it]

Error processing row 15925: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  80%|████████  | 16026/20000 [4:33:11<1:23:43,  1.26s/it]

Error processing row 16025: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  81%|████████▏ | 16295/20000 [4:39:06<1:20:55,  1.31s/it]

Error processing row 16294: unsupported operand type(s) for +: 'NoneType' and 'float'


Processing Rows:  82%|████████▏ | 16385/20000 [4:41:05<1:10:49,  1.18s/it]

Error processing row 16384: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  83%|████████▎ | 16538/20000 [4:44:27<1:15:32,  1.31s/it]

Error processing row 16537: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  85%|████████▌ | 17094/20000 [4:56:57<1:01:09,  1.26s/it]

Error processing row 17093: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  86%|████████▌ | 17211/20000 [4:59:35<57:15,  1.23s/it]  

Error processing row 17210: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  86%|████████▋ | 17283/20000 [5:01:12<57:11,  1.26s/it]  

Error processing row 17282: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  88%|████████▊ | 17626/20000 [5:08:59<50:43,  1.28s/it]  

Error processing row 17625: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  89%|████████▉ | 17760/20000 [5:12:02<47:59,  1.29s/it]

Error processing row 17759: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  89%|████████▉ | 17761/20000 [5:12:03<46:07,  1.24s/it]

Error processing row 17760: unsupported operand type(s) for +: 'NoneType' and 'NoneType'


Processing Rows:  92%|█████████▏| 18367/20000 [5:25:54<37:37,  1.38s/it]

Error processing row 18366: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  94%|█████████▍| 18835/20000 [5:36:34<25:29,  1.31s/it]

Error processing row 18834: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  94%|█████████▍| 18889/20000 [5:37:46<22:57,  1.24s/it]

Error processing row 18888: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  95%|█████████▌| 19042/20000 [5:41:17<22:17,  1.40s/it]

Error processing row 19041: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  97%|█████████▋| 19348/20000 [5:48:17<13:20,  1.23s/it]

Error processing row 19347: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  98%|█████████▊| 19691/20000 [5:56:08<06:43,  1.31s/it]

Error processing row 19690: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  99%|█████████▉| 19753/20000 [5:57:33<05:25,  1.32s/it]

Error processing row 19752: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows:  99%|█████████▉| 19869/20000 [6:00:13<02:54,  1.33s/it]

Error processing row 19868: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows: 100%|█████████▉| 19941/20000 [6:01:52<01:15,  1.27s/it]

Error processing row 19940: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows: 100%|█████████▉| 19951/20000 [6:02:05<01:02,  1.28s/it]

Error processing row 19950: unsupported operand type(s) for +: 'float' and 'NoneType'


Processing Rows: 100%|██████████| 20000/20000 [6:03:12<00:00,  1.09s/it]

Total descriptors processed: 11550





In [None]:
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.tree import DecisionTreeRegressor
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn.neighbors import KNeighborsRegressor

# Assuming target_property and descriptors are defined
df_target = pd.Series(target_property, name='target')
df_descriptors = pd.DataFrame(descriptors).T

df = df_descriptors.join(df_target, how='inner')

# Split data into features (X) and target (y)
X = df.drop(columns='target')
y = df['target']

# Split the data into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# List of models to evaluate
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(),
    "Lasso Regression": Lasso(),
    "Decision Tree": DecisionTreeRegressor(max_depth=10),
    "Random Forest": RandomForestRegressor(n_estimators=50, max_depth=10, random_state=42),
    "Gradient Boosting": GradientBoostingRegressor(n_estimators=50, max_depth=3, random_state=42),
    "Support Vector Regressor": SVR(kernel="linear"),
    "k-Nearest Neighbors": KNeighborsRegressor(n_neighbors=5)
}

# Store results
results = {}

# Iterate over models
for model_name, model in models.items():
    # Optional: Use PCA for dimensionality reduction if needed
    pipeline = Pipeline([
        ('pca', PCA(n_components=5)),  # Adjust n_components based on explained variance
        ('regressor', model)
    ])
    
    # Train the model
    pipeline.fit(X_train, y_train)
    
    # Predict on the test set
    y_pred = pipeline.predict(X_test)
    
    # Calculate the R-squared score
    r2_test = r2_score(y_test, y_pred)
    r2_train = r2_score(y_train, pipeline.predict(X_train))
    
    # Store the results
    results[model_name] = r2
    print(f"{model_name} R-squared: {r2_test:.2f} (Test), {r2_train:.2f} (Train)")   

In [11]:
from mendeleev import element
import numpy as np
import re

# Function to parse compound formula (e.g., AxByCz)
def parse_formula(formula):
    pattern = re.compile(r"([A-Z][a-z]*)(\d*)")
    elements = pattern.findall(formula)
    return {el: int(num) if num else 1 for el, num in elements}

# Function to retrieve basic properties of each element
def get_element_properties(element_symbol):
    elem = element(element_symbol)
    return {
        'atomic_number': elem.atomic_number,
        'electronegativity': elem.en_pauling or 0.0,
        'atomic_radius': elem.atomic_radius or 0.0,
        'valence_electrons': elem.nvalence if elem.nvalence else 0
    }

# Descriptor function
def generate_descriptor(formula):
    composition = parse_formula(formula)
    elements = list(composition.keys())
    
    # Initialize dictionaries to store properties
    atomic_numbers = []
    electronegativities = []
    atomic_radii = []
    valence_electrons = []
    atomic_masses = []
    
    # Gather elemental properties
    for element_symbol, count in composition.items():
        props = get_element_properties(element_symbol)
        atomic_numbers.extend([props['atomic_number']] * count)
        electronegativities.extend([props['electronegativity']] * count)
        atomic_radii.extend([props['atomic_radius']] * count)
        valence_electrons.extend([props['valence_electrons']] * count)
        atomic_masses.extend([element(element_symbol).atomic_weight] * count)
    
    # Compositional features
    total_atoms = sum(composition.values())
    stoichiometric_ratios = [composition[el] / total_atoms for el in elements]
    average_atomic_mass = np.mean(atomic_masses)
    
    # Statistical measures for elemental properties
    stats = {
        'mean_atomic_number': np.mean(atomic_numbers),
        'std_atomic_number': np.std(atomic_numbers),
        'min_atomic_number': np.min(atomic_numbers),
        'max_atomic_number': np.max(atomic_numbers),
        'mean_electronegativity': np.mean(electronegativities),
        'std_electronegativity': np.std(electronegativities),
        'min_electronegativity': np.min(electronegativities),
        'max_electronegativity': np.max(electronegativities),
        'mean_atomic_radius': np.mean(atomic_radii),
        'std_atomic_radius': np.std(atomic_radii),
        'min_atomic_radius': np.min(atomic_radii),
        'max_atomic_radius': np.max(atomic_radii),
        'mean_valence_electrons': np.mean(valence_electrons),
        'std_valence_electrons': np.std(valence_electrons),
        'min_valence_electrons': np.min(valence_electrons),
        'max_valence_electrons': np.max(valence_electrons),
    }
    
    # Heuristic quantities
    # Differences in electronegativity between neighboring atoms
    electronegativity_diffs = []
    for i, el1 in enumerate(elements):
        for el2 in elements[i + 1:]:
            diff = abs(get_element_properties(el1)['electronegativity'] - get_element_properties(el2)['electronegativity'])
            electronegativity_diffs.append(diff)
    mean_electronegativity_diff = np.mean(electronegativity_diffs) if electronegativity_diffs else 0.0

    # Atomic packing fraction placeholder (assuming crystal structure details are unknown)
    # For a rough estimate, we'll use the approximate atomic volume based on atomic radii
    if atomic_radii:
        atomic_volumes = [(4 / 3) * np.pi * (r ** 3) for r in atomic_radii]
        packing_fraction = np.sum(atomic_volumes) / total_atoms  # A rough average
    else:
        packing_fraction = 0.0

    # Descriptor vector
    descriptor_vector = [
        *stoichiometric_ratios,               # Compositional stoichiometric ratios
        average_atomic_mass,                  # Average atomic mass
        mean_electronegativity_diff,          # Mean electronegativity difference
        packing_fraction,                     # Estimated atomic packing fraction
        *stats.values()                       # Statistical measures
    ]
    
    return descriptor_vector

# Example Usage
compound_formula = 'H2O3Ge2'
descriptor_vector = generate_descriptor(compound_formula)
print(f"Descriptor vector for {compound_formula}:")
print(descriptor_vector)


TypeError: unsupported operand type(s) for +: 'method' and 'method'