In [None]:
import torch
from ase.io import read
from mattersim.forcefield import MatterSimCalculator
from sklearn.metrics import mean_absolute_error
import matplotlib.pyplot as plt
import numpy as np


atoms_list = read("/kaggle/input/energy-molecules/test.xyz", index=":")

best_model = "/kaggle/input/pre-trained-ms/best_model.pth"
device = "cuda" if torch.cuda.is_available() else "cpu"
calc = MatterSimCalculator(load_path=best_model, device=device)

energies = []
for i, atoms in enumerate(atoms_list):
    atoms.calc = calc
    E = atoms.get_potential_energy() / len(atoms)
    energies.append(E)
y_true = []
with open("/kaggle/input/energy-molecules/test.xyz") as f:
    while True:
        line = f.readline()
        if not line:  # EOF
            break
        n_atoms = int(line)
        comment = f.readline().strip()           
        y_true.append(float(comment.split("=")[1]))
        for _ in range(n_atoms):
            f.readline()
mae_best = mean_absolute_error(y_true, energies)
print(f'MAE train model:{mae_best}')


min_val = min(min(y_true), min(energies))
max_val = max(max(y_true), max(energies))

plt.figure(figsize=(6,6))
plt.scatter(y_true, energies, s=10, alpha=0.6, color='blue', label='Молекулы')
plt.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=1.5, label='y = x')
plt.xlabel('True values (eV/atom)')
plt.ylabel('Predicted values (eV/atom)')
plt.title('Trained model')
plt.legend(loc='upper left')
plt.grid(True, linestyle=':')
plt.axis('equal')
plt.tight_layout()
plt.savefig('/kaggle/working/parity_plot_trained.png', dpi=300)  
plt.show()


base_model = '/kaggle/input/pre-trained-ms/mattersim-v1.0.0-5M.pth'
calc = MatterSimCalculator(load_path=base_model, device=device)

energies = []
for i, atoms in enumerate(atoms_list):
    atoms.calc = calc
    E = atoms.get_potential_energy() / len(atoms)
    energies.append(E)
y_true = []
with open("/kaggle/input/energy-molecules/test.xyz") as f:
    while True:
        line = f.readline()
        if not line:  # EOF
            break
        n_atoms = int(line)
        comment = f.readline().strip()           
        y_true.append(float(comment.split("=")[1]))
        for _ in range(n_atoms):
            f.readline()
mae_base = mean_absolute_error(y_true, energies)
print(f'MAE pre-train model:{mae_base}')

min_val = min(min(y_true), min(energies))
max_val = max(max(y_true), max(energies))

plt.figure(figsize=(6,6))
plt.scatter(y_true, energies, s=10, alpha=0.6, color='blue', label='Молекулы')
plt.plot([min_val, max_val], [min_val, max_val], 'r--', linewidth=1.5, label='y = x')
plt.xlabel('True values (eV/atom)')
plt.ylabel('Predicted values (eV/atom)')
plt.title('Pre-trained model')
plt.legend(loc='upper left')
plt.grid(True, linestyle=':')
plt.axis('equal')
plt.tight_layout()
plt.savefig('/kaggle/working/parity_plot_pretrained.png', dpi=300)  
plt.show()