In [2]:
import torch
from torch_geometric.data import Batch
from torch_geometric.loader import DataLoader
import gpytorch

import xgboost as xgb
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

ModuleNotFoundError: No module named 'torch'

In [12]:
import pandas as pd
import numpy as np
import json
from utils.classes import *
from utils.exclude import *
import re
from collections import defaultdict

filename = r'data/solid-state_dataset_20200713.json'
filedata = open(filename, mode='r').read()
jsonParse = json.loads(filedata)

reactions = [from_dict(reaction, ReactionEntry) for reaction in jsonParse['reactions']]

In [13]:
periodic_table = {
    'H': 1, 'He': 2, 'Li': 3, 'Be': 4, 'B': 5, 'C': 6, 'N': 7, 'O': 8, 'F': 9, 'Ne': 10,
    'Na': 11, 'Mg': 12, 'Al': 13, 'Si': 14, 'P': 15, 'S': 16, 'Cl': 17, 'Ar': 18, 'K': 19, 'Ca': 20,
    'Sc': 21, 'Ti': 22, 'V': 23, 'Cr': 24, 'Mn': 25, 'Fe': 26, 'Co': 27, 'Ni': 28, 'Cu': 29, 'Zn': 30,
    'Ga': 31, 'Ge': 32, 'As': 33, 'Se': 34, 'Br': 35, 'Kr': 36, 'Rb': 37, 'Sr': 38, 'Y': 39, 'Zr': 40,
    'Nb': 41, 'Mo': 42, 'Tc': 43, 'Ru': 44, 'Rh': 45, 'Pd': 46, 'Ag': 47, 'Cd': 48, 'In': 49, 'Sn': 50,
    'Sb': 51, 'Te': 52, 'I': 53, 'Xe': 54, 'Cs': 55, 'Ba': 56, 'La': 57, 'Ce': 58, 'Pr': 59, 'Nd': 60,
    'Pm': 61, 'Sm': 62, 'Eu': 63, 'Gd': 64, 'Tb': 65, 'Dy': 66, 'Ho': 67, 'Er': 68, 'Tm': 69, 'Yb': 70,
    'Lu': 71, 'Hf': 72, 'Ta': 73, 'W': 74, 'Re': 75, 'Os': 76, 'Ir': 77, 'Pt': 78, 'Au': 79, 'Hg': 80,
    'Tl': 81, 'Pb': 82, 'Bi': 83, 'Po': 84, 'At': 85, 'Rn': 86, 'Fr': 87, 'Ra': 88, 'Ac': 89, 'Th': 90,
    'Pa': 91, 'U': 92, 'Np': 93, 'Pu': 94, 'Am': 95, 'Cm': 96, 'Bk': 97, 'Cf': 98, 'Es': 99, 'Fm': 100,
    'Md': 101, 'No': 102, 'Lr': 103, 'Rf': 104, 'Db': 105, 'Sg': 106, 'Bh': 107, 'Hs': 108, 'Mt': 109,
    'Ds': 110, 'Rg': 111, 'Cn': 112, 'Nh': 113, 'Fl': 114, 'Mc': 115, 'Lv': 116, 'Ts': 117, 'Og': 118,
    'dummy':119
}

In [14]:
def RemoveBadEntries(reactions: list,
                        min_precursors = 2,
                        remove_bad_doi = True,
                        remove_bad_target = True,
                        remove_bad_precursor = True, 
                        remove_duplicates_via_doi = True,
                        remove_invalid_coefficients_multiplicities = True,
                        use_bad_list = True,
                        remove_negative_coefficients = True, 
                        verbose_output = True) -> List[ReactionEntry]:
    
    """
    Filters out bad reaction entries from a given list based on various criteria.
    
    Parameters:
    reactions (list): List of ReactionEntry objects.
    min_precursors (int): Minimum number of precursors required. Default is 2.
    remove_bad_doi (bool): Flag to remove entries with bad DOIs. Default is True.
    remove_bad_target (bool): Flag to remove entries with bad targets. Default is True.
    remove_bad_precursor (bool): Flag to remove entries with bad precursors. Default is True.
    remove_duplicates_via_doi (bool): Flag to remove duplicate entries via DOI. Default is True.
    remove_invalid_coefficients_multiplicities (bool): Flag to remove entries with invalid coefficients or multiplicities. Default is True.
    use_bad_list (bool): Flag to use a predefined list of bad entries. Default is True.
    remove_negative_coefficients (bool): Flag to remove entries with negative coefficients. Default is True.
    verbose_output (bool): Flag to enable verbose output. Default is True.
    
    Returns:
    list: A list of filtered ReactionEntry objects.
    """
        
    filtered_reactions = []
    bad_list = ['*', '-', 'x', '+', '/', 'ac', '(2N)', '(3N)', '(4N)', '(5N)', '(6N)', '7LiOH', '2Ni(OH)2']
    isDigitRegex = re.compile(r'^-?\d+(\.\d+)?$')
    isNegativeRegex = re.compile(r'^(0*[1-9]\d*|0*\d*\.\d*[1-9])$')
    RegexSelected = isNegativeRegex
    if not remove_negative_coefficients:
        RegexSelected = isDigitRegex
    for reaction in reactions:
        rxn: ReactionEntry = reaction
        if (verbose_output): print(rxn.reaction_string, end='')
        if remove_bad_doi and rxn.doi in BAD_DOI: 
            if (verbose_output): print(": REJECTED DUE TO BAD DOI")
            continue
        if len(rxn.precursors) < min_precursors: 
            if (verbose_output): print(": REJECTED DUE TO LOW PRECURSOR COUNT")
            continue
        if remove_bad_target and any(target in BAD_TARGETS for target in (rxn.targets_string)):  
            if (verbose_output): print(": REJECTED DUE TO BAD TARGET")
            continue
        if remove_bad_precursor and  any(precursor.material_formula in BAD_PRECURSORS for precursor in rxn.precursors):  
            if (verbose_output): print(": REJECTED DUE TO BAD PRECURSOR")
            continue
        # if any([not bool(isDigitRegex.match(s.amount)) for s in rxn.reaction.left_side]):
        #     if (verbose_output): print(": REJECTED DUE TO UNKNOWN COEFFICIENT IN LHS")
        #     continue
        # if any([not bool(isDigitRegex.match(s.amount)) for s in rxn.reaction.right_side]):
        #     if (verbose_output): print(": REJECTED DUE TO UNKNOWN COEFFICIENT IN RHS")
        #     continue 
        if remove_invalid_coefficients_multiplicities and any([not bool(RegexSelected.match(s.amount)) for s in rxn.reaction.left_side]):
            if (verbose_output): print(": REJECTED DUE TO INVALID COEFFICIENT IN LHS")
            continue
        if remove_invalid_coefficients_multiplicities and any([not bool(RegexSelected.match(s.amount)) for s in rxn.reaction.right_side]):
            if (verbose_output): print(": REJECTED DUE TO INVALID COEFFICIENT IN RHS")
            continue

        found_bad = False
        if use_bad_list: 
            for bad in bad_list:
                if(any(bad in target_string for target_string in rxn.targets_string)) \
                or any(bad in precursor.material_formula for precursor in rxn.precursors):
                    found_bad = True
        
        if found_bad:  
            if (verbose_output): print(": REJECTED CHARACTER FROM BAD LIST")
            continue
        else:
            if (verbose_output): print(": SELECTED") 
            filtered_reactions.append(rxn)
    print("Filtered", len(filtered_reactions), "reactions out of total", len(reactions))
    return filtered_reactions

def NormalizePrecursors(reactions: list) -> List[ReactionEntry]:
    
    """
    Normalizes precursor materials in the given list of reactions based on predefined replacements.
    
    Parameters:
    reactions (list): List of ReactionEntry objects.
    
    Returns:
    list: A list of ReactionEntry objects with normalized precursors.
    """

    PrecursorMaterialReplacements = {}
    for key, value in PREC_REPLACEMENTS.items():
        PrecursorKey    = [ material for reaction in reactions for material in reaction.precursors if material.material_formula == key]
        PrecursorValue  = [ material for reaction in reactions for material in reaction.precursors if material.material_formula == value]
        filtered_reactions = [reaction for reaction in reactions for material in reaction.precursors if key in material.material_formula]
        number_replacements = len(filtered_reactions)
        if(len(PrecursorKey) > 0):
            for rxn in filtered_reactions:
                for prec in rxn.precursors:
                    if prec.material_formula == PrecursorKey[0].material_formula: 
                        #print("replace here")
                        prec = PrecursorValue[0]
                    if PrecursorKey[0].material_formula in rxn.reaction_string:
                        rxn.reaction_string.replace(PrecursorKey[0].material_formula, PrecursorValue[0].material_formula)
                    # TODO: You still have to replace Formula parts in rxn.reaction. Find a proposal that works.
            PrecursorMaterialReplacements[PrecursorKey[0]] = PrecursorValue[0]
            print("Processed:", key, '=', value, ": replaced", number_replacements, " places")
        else: print("Skipped:", key)
    return reactions

def RemoveDuplicates(reactions: list) -> List[ReactionEntry]:
    
    """
    Placeholder for function to remove duplicates from the list of reactions.
    TODO: Integrate all other "Remove*Duplicates" Functions

    Parameters:
    reactions (list): List of ReactionEntry objects.
    
    Returns:
    list: A list of ReactionEntry objects without duplicates.
    """

    return reactions

def RemoveDOIDuplicates(reactions: list) -> List[ReactionEntry]:
    
    """
    Removes duplicate reactions based on DOI and reaction string.
    
    Parameters:
    reactions (list): List of ReactionEntry objects.
    
    Returns:
    list: A list of ReactionEntry objects without DOI duplicates.
    """

    # Assuming 'reactions' is your list of ReactionEntry objects

    # Create a defaultdict to store entries grouped by (doi, reaction_string)
    entry_dict = defaultdict(list)
    for entry in reactions:
        entry_dict[(entry.doi, entry.reaction_string)].append(entry)

    # Filter out entries where there are duplicates (keep only the first occurrence)
    filtered_reactions_doi = []
    seen_keys = set()
    for entry in reactions:
        key = (entry.doi, entry.reaction_string)
        if key not in seen_keys:
            seen_keys.add(key)
            filtered_reactions_doi.append(entry)
    print("Filtered", len(filtered_reactions_doi), "reactions out of total", len(reactions))
    return filtered_reactions_doi

def RemoveNodeMatchDuplicates(reactions: list) -> List[ReactionEntry]:
    
    """
    Removes duplicates based on node matches in the reaction entries.
    
    Parameters:
    reactions (list): List of ReactionEntry objects.
    
    Returns:
    list: A list of ReactionEntry objects without node match duplicates.
    """
    # Dictionary to store entries grouped by (right_side_tuple, amount_tuple)
    duplicate_entries = defaultdict(list)
    filtered_reactions_dupForm = []
    for entry in reactions:
        # Create a tuple representation of right_side
        right_side_tuple = tuple((part.amount, part.material) for part in entry.reaction.right_side)

        # Create sets of (amount, material) tuples for target and precursors
        target_materials = {(comp.amount, comp.formula) for comp in entry.target.composition}
        precursor_materials =  {(mat.amount, mat.formula) for composition in (material.composition for material in reactions[0].precursors) for mat in composition}

        # Create a tuple for (target materials, precursor materials)
        materials_tuple = (frozenset(target_materials), frozenset(precursor_materials))

        key = (right_side_tuple, materials_tuple)
        duplicate_entries[key].append(entry)

    # Now, find and print duplicates
    seen_keys = set()
    for key, entries in duplicate_entries.items():
        if len(entries) > 1:
            if key not in seen_keys:
                seen_keys.add(key)
                filtered_reactions_dupForm.append(entry)
            print(len(entries), "\tDuplicates for: ", end='')
            print(f"Right Side: {key[0]}", end='')
            print("Target materials:", end='')
            print(key[1][0], end='')
            print("Precursor materials:", end='')
            print(key[1][1], end='')
            print()
            for i in range(len(entries)):
                entry = entries[i]
                print("Entry #{}: ".format(i), end='')
                calc_operations = [op for op in entry.operations if "calc" in op.token]
                print("No. Calcination Operations: {}, ".format(len(calc_operations)))
                for op in calc_operations:
                    print(op)
    print("Filtered", len(filtered_reactions_dupForm), "reactions out of total", len(reactions))
    return filtered_reactions_dupForm


In [15]:
badEntriesList = RemoveBadEntries(reactions, verbose_output = False)
doiEntriesList = RemoveDOIDuplicates(badEntriesList)

Filtered 20093 reactions out of total 31782
Filtered 19318 reactions out of total 20093


In [16]:
def split_reaction(reaction_string):
    
    """
    Splits a chemical reaction string into reactants and products.

    Parameters:
    reaction_string (str): The reaction string to be split, with reactants and products separated by '=='.

    Returns:
    Tuple[str, str]: A tuple containing the reactants and products as separate strings.
    """
        
    reactants, products = reaction_string.split('==')
    reactants = reactants.strip()
    products = products.strip()
    return reactants, products

def parse_chemical_formula(formula):

    """
    Parses a chemical formula into its constituent elements and their multiplicities.

    Parameters:
    formula (str): The chemical formula to be parsed.

    Returns:
    List[Tuple[str, str, str]]: A list of tuples, each containing an element, its multiplicity, and its atomic number.
    """

    pattern = r'([A-Z][a-z]*)(\d*\.?\d*)'
    matches = re.findall(pattern, formula)
    element_details = []
    for match in matches:
        element, multiplicity = match
        multiplicity = multiplicity if multiplicity else '1'
        atomic_number = re.findall(r'\d+', multiplicity)
        atomic_number = atomic_number[0] if atomic_number else '1'
        element_details.append((element, multiplicity, atomic_number))
    return element_details

def extract_element_details(reaction):

    """
    Extracts element details from a chemical reaction string.

    Parameters:
    reaction (str): The reaction string containing elements separated by '+'.

    Returns:
    List[Tuple[str, str, str]]: A list of tuples containing element details.
    """

    parts = reaction.split('+')
    element_details = []
    for part in parts:
        part = part.strip()
        element_details.extend(parse_chemical_formula(part))
    return element_details

def expand_element_details(element_details, prefix):

    """
    Expands element details into a dictionary with a given prefix.

    Parameters:
    element_details (List[Tuple[str, str, str]]): A list of tuples containing element details.
    prefix (str): A prefix for the dictionary keys.

    Returns:
    Dict[str, str]: A dictionary with element details expanded into key-value pairs.
    """

    data = {}
    for i, detail in enumerate(element_details):
        element, multiplicity, atomic_number = detail
        data[f'{prefix}_element_{i+1}'] = element
        data[f'{prefix}_multiplicity_{i+1}'] = multiplicity
        data[f'{prefix}_atomic_number_{i+1}'] = atomic_number
    return data

def extract_temperatures(operations):

    """
    Extracts sintering and calcination temperatures from a list of operations.

    Parameters:
    operations (List[Operation]): A list of operations, where each operation has a type and a token.

    Returns:
    Tuple[float, float]: A tuple containing sintering and calcination temperatures.
    """

    sintering_temp = None
    calcination_temp = None
    for operation in operations:
        if operation.type == 'HeatingOperation':
            if operation.token == 'sintered':
                sintering_temp = extract_temp(operation.conditions)
            elif operation.token == 'calcined':
                calcination_temp = extract_temp(operation.conditions)
    return sintering_temp, calcination_temp

def extract_temp(conditions):

    """
    Extracts the temperature from a set of conditions.

    Parameters:
    conditions (Conditions): Conditions containing heating temperature data.

    Returns:
    float: The first heating temperature value found, or None if not found.
    """
    
    if conditions.heating_temperature:
        for temp in conditions.heating_temperature:
            if temp.values:
                return temp.values[0]
    return None

In [17]:
# Convert to DataFrame
df = pd.DataFrame([{
    'doi': entry.doi,
    'paragraph_string': entry.paragraph_string,
    'synthesis_type': entry.synthesis_type,
    'reaction_string': entry.reaction_string,
    'targets_string': entry.targets_string,
    'sintering_temp': extract_temperatures(entry.operations)[0],
    'calcination_temp': extract_temperatures(entry.operations)[1]
} for entry in doiEntriesList])

# Apply the function to create new columns
df[['input_reaction', 'output_reaction']] = df['reaction_string'].apply(lambda x: pd.Series(split_reaction(x)))
df['input_elements'] = df['input_reaction'].apply(extract_element_details)
df['output_elements'] = df['output_reaction'].apply(extract_element_details)
input_expanded = df['input_elements'].apply(lambda x: pd.Series(expand_element_details(x, 'input')))
output_expanded = df['output_elements'].apply(lambda x: pd.Series(expand_element_details(x, 'output')))

# Concatenate the expanded details with the original DataFrame
df = pd.concat([df, input_expanded, output_expanded], axis=1)

# Drop temporary columns
df.drop(columns=['input_elements', 'output_elements'], inplace=True)

print(np.array(df).shape)

(19318, 336)


In [11]:
excel_path = './reaction_entries.xlsx'
df.to_excel(excel_path, index=False)

ModuleNotFoundError: No module named 'openpyxl'

In [33]:
input_count = 0
output_count = 0
columns = df.columns
for column in columns:
    # print(column)
    print(column.split("_"))
    if column.split("_")[0] == "input": 
        input_count +=1
    if column.split("_")[0] == "output": 
        output_count +=1

total_input_list =[]
for i, temp in enumerate(df["sintering_temp"]):
    
    if not np.isnan(i):
        input_list = []
        for j in range(len(input_count)):
            
            if df[f"input_multiplicity_{i}"] !=0:
                in_atom_num = df[f"input_atomic_number_{i}"] 
                in_el = df[f"input_element_{i}"] 
                in_mul = df[f"input_multiplicity_{i}"] 
                input_list.append([in_el, in_mul])
        
        for j in range(len(output_count)):
            if  df[f"input_multiplicity_{i}"] !=0:
                out_atom_num = df[f"output_atomic_number_{i}"] 
                out_el = df[f"output_element_{i}"] 
                out_mul = df[f"output_multiplicity_{i}"] 
                input_list.append([out_el, out_mul])


['doi']
['paragraph', 'string']
['synthesis', 'type']
['reaction', 'string']
['targets', 'string']
['sintering', 'temp']
['calcination', 'temp']
['input', 'reaction']
['output', 'reaction']
['input', 'element', '1']
['input', 'multiplicity', '1']
['input', 'atomic', 'number', '1']
['input', 'element', '2']
['input', 'multiplicity', '2']
['input', 'atomic', 'number', '2']
['input', 'element', '3']
['input', 'multiplicity', '3']
['input', 'atomic', 'number', '3']
['input', 'element', '4']
['input', 'multiplicity', '4']
['input', 'atomic', 'number', '4']
['input', 'element', '5']
['input', 'multiplicity', '5']
['input', 'atomic', 'number', '5']
['input', 'element', '6']
['input', 'multiplicity', '6']
['input', 'atomic', 'number', '6']
['input', 'element', '7']
['input', 'multiplicity', '7']
['input', 'atomic', 'number', '7']
['input', 'element', '8']
['input', 'multiplicity', '8']
['input', 'atomic', 'number', '8']
['input', 'element', '9']
['input', 'multiplicity', '9']
['input', 'atomic

TypeError: object of type 'int' has no len()

In [None]:
for element, count in composition.items():
    index = periodic_table[element]
    vector[index - 1] = count / total_atoms  
embedding = np.sum


In [None]:
class NN(nn.Module):
    def __init__(self):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(119, 360)
        self.fc2 = nn.Linear(360, 180)
        self.fc3 = nn.Linear(180, 100)
        self.fc4 = nn.Linear(100, 64)
        self.fc5 = nn.Linear(64, 32)
        self.fc6 = nn.Linear(32, 1)

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = torch.relu(self.fc4(x))
        x = torch.relu(self.fc5(x))
        x = self.fc6(x)
        return x
model = NN()

In [None]:
def train(train_loader, device, model, optimizer, criterion):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    average_loss = running_loss / len(train_loader)
    return average_loss

def validate(validation_loader, device, model, criterion):
    model.eval()
    running_loss = 0.0
    validation_outputs = []
    validation_truth = []

    with torch.no_grad():
        for inputs, labels in validation_loader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item()
            validation_outputs.append(outputs.detach().numpy())
            validation_truth.append(labels.detach().numpy())

    average_loss = running_loss / len(validation_loader)
    validation_outputs = np.concatenate(validation_outputs)
    validation_truth = np.concatenate(validation_truth)

    return average_loss, validation_outputs, validation_truth


def test(test_loader, device, model, criterion):
    model.eval()
    running_loss = 0.0
    test_outputs = []
    test_truth = []

    with torch.no_grad():
        for inputs, targets in test_loader:
            inputs, targets = inputs.to(device), targets.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, targets)

            running_loss += loss.item()
            test_outputs.append(outputs.detach().numpy())
            test_truth.append(targets.detach().numpy())

    average_loss = running_loss / len(test_loader)
    test_outputs = np.concatenate(test_outputs)
    test_truth = np.concatenate(test_truth)

    return average_loss, test_outputs, test_truth

In [None]:
sampling_size = 6

best_vals =[]
best_models = []
best_tests = []

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

train_test_split_index = int(0.9 * len(data_list))

train_val_data = data_list[:train_test_split_index]

test_data = data_list[train_test_split_index:]
test_loader = Dataloader(test_data, batch_size=16, shuffle=False)

data_len = len(train_val_data)
num_batches = data_len // sampling_size
indices = np.arange(data_len)
for i in range(num_batches):
    best_val_loss = np.inf

    start_index = i * sampling_size

    val_indices = indices[start_index:start_index + sampling_size]

    train_indices = np.setdiff1d(indices, val_test_indices)

    train_data = [train_val_data[j] for j in train_indices]
    val_data = [train_val_data[j] for j in val_indices]

    train_loader = Dataloader(train_data, batch_size=16, shuffle=True)
    validation_loader = Dataloader(val_data, batch_size=16, shuffle=False)

    model.to(device)

    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min',
                                                        factor=0.8, patience=5,
                                                        min_lr=0.0000001)
    criterion = nn.L1Loss()
    best_validation_loss = float('inf')

    for epoch in range(1, EPOCHS+1):

        model.train()
        loss = train(train_loader, device, model, optimizer, criterion)
        scheduler.step(loss)

        validation_loss, validation_output, validation_truth_temp = validate(validation_loader, device, model, criterion)
        print(f"Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Validation Loss: {validation_loss:.4f}")
        if validation_loss < best_validation_loss:
            best_validation_loss = validation_loss
            best_model_state = copy.deepcopy(model.state_dict())
            best_model = model
            best_val_loss = validation_loss
            print(f"Best Validation Loss: {best_val_loss:.4f}")
model.load_state_dict(best_model_state)
test_loss, test_outputs, test_truth = test(test_loader, device, model, criterion)
print(f"Test Loss: {test_loss:.4f}")


In [None]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)

dtrain = xgb.DMatrix(X_train, label=y_train)
dval = xgb.DMatrix(X_val, label=y_val)
dtest = xgb.DMatrix(X_test, label=y_test)

params = {
    'objective': 'reg:squarederror',
    'eval_metric': 'rmse',
    'max_depth': 6,
    'eta': 0.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8
}

evals = [(dtrain, 'train'), (dval, 'eval')]

num_boost_round = 100
early_stopping_rounds = 10
model = xgb.train(params, dtrain, num_boost_round, evals, early_stopping_rounds=early_stopping_rounds, verbose_eval=True)

y_pred = model.predict(dtest)
test_rmse = mean_squared_error(y_test, y_pred, squared=False)
print(f'Test RMSE: {test_rmse:.4f}')