In [1]:
%env CUDA_VISIBLE_DEVICES=

env: CUDA_VISIBLE_DEVICES=


In [2]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from megnet.models import MEGNetModel
from megnet_graphs_train import get_8x8_experiments

In [3]:
experiments = get_8x8_experiments()

In [4]:
save_path = os.path.join("datasets", "predicted_dichalcogenides_innopolis_202105_v5")
plots_path = os.path.join(save_path, "plots")

In [5]:
def get_prediction_column(target):
  return f"predicted_{target}"

In [6]:
def process_experiment(experiment):
  data = {}
  results = {}
  results.update(experiment.__dict__)
  data["train"] = pd.read_pickle(experiment.train_path)
  data["test"] = pd.read_pickle(experiment.test_path)
  for data_part in ("train", "test"):
    results[f"{data_part}_size"] = len(data[data_part])
  prediction_column = get_prediction_column(experiment.target)
  model = MEGNetModel.from_file(os.path.join(experiment.model_path))
  for this_data in data.values():
    this_data.loc[:, prediction_column] = model.predict_structures(this_data.defect_representation)
  fig, ax = plt.subplots()
  for data_name, data_part in data.items():
    errors = np.abs(data_part.loc[:, experiment.target]-data_part.loc[:, prediction_column])
    mae = np.mean(errors)
    mae_std = np.std(errors)
    results[f"{data_name}_mae"] = mae
    results[f"{data_name}_mae_std"] = mae_std
    ax.scatter(data_part.loc[:, experiment.target], data_part.loc[:, prediction_column],
               label=f"{data_name}, $MAE={mae:.4f} \pm {mae_std:.4f}$",
               alpha=0.5)
  ax.set_xlabel(f"DFT {experiment.target}, eV")
  ax.set_ylabel(f"Predicted {experiment.target}, eV")
  ax.legend()

  lims = [
      np.min([ax.get_xlim(), ax.get_ylim()]),
      np.max([ax.get_xlim(), ax.get_ylim()]),
    ]

  ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
  ax.set_aspect('equal')
  ax.set_xlim(lims)
  ax.set_ylim(lims)
  ax.set_title(f"{experiment.name}, train size={len(data['train'])}")
  fig.savefig(os.path.join(plots_path, f"{experiment.target}_{experiment.name}.pdf"),
              bbox_inches="tight",
              metadata={
                "Author": "Nikita Kazeev",
                "Title": f"MEGNet on defect-only representation, {experiment.target}, {experiment.name}",
                "Keywords": "2D materials, machine learning, graph neural network, MEGNet"}
             )
  columns_to_save = ["_id", experiment.target, get_prediction_column(experiment.target)]
  for data_name, data_part in data.items():
    data_part.to_csv(os.path.join(save_path, f"{experiment.name}_{experiment.target}_{data_name}.csv.gz"),
                     columns=columns_to_save)
  return results

In [7]:
from multiprocessing import Pool
with Pool(20) as p:
  results = p.map(process_experiment, experiments)

In [9]:
results_pd = pd.DataFrame.from_dict(data=results)
results_pd.to_csv(os.path.join(save_path, "summary.csv.gz"), index=False)
results_pd

Unnamed: 0,data_name,train_path,test_path,name,target,epochs,atom_features,add_bond_z_coord,model_path,learning_rate,vacancy_only,train_size,test_size,train_mae,train_mae_std,test_mae,test_mae_std
0,vac_only_8x8_split,datasets/train_defects_vac_only_8x8_split.pick...,datasets/test_defects_vac_only_8x8_split.pickl...,vac_only_8x8_split_bond_z_Z_1000,homo,1000,Z,True,models/MEGNet-defect-only/homo/vac_only_8x8_sp...,0.0002,True,3256,57,0.013167,0.014055,0.023155,0.019361
1,vac_only_no_8x8_in_train,datasets/train_defects_vac_only_no_8x8_in_trai...,datasets/test_defects_vac_only_no_8x8_in_train...,vac_only_no_8x8_in_train_bond_z_Z_400,homo,400,Z,True,models/MEGNet-defect-only/homo/vac_only_no_8x8...,0.0002,True,3200,113,0.02776,0.025382,0.055132,0.033525
2,vac_only_no_8x8_in_train,datasets/train_defects_vac_only_no_8x8_in_trai...,datasets/test_defects_vac_only_no_8x8_in_train...,vac_only_no_8x8_in_train_bond_z_Z_1000,homo,1000,Z,True,models/MEGNet-defect-only/homo/vac_only_no_8x8...,0.0002,True,3200,113,0.014487,0.014891,0.078468,0.034509
3,vac_only_8x8_split,datasets/train_defects_vac_only_8x8_split.pick...,datasets/test_defects_vac_only_8x8_split.pickl...,vac_only_8x8_split_bond_z_Z_1000,formation_energy,1000,Z,True,models/MEGNet-defect-only/formation_energy/vac...,0.0002,True,3256,57,0.054261,0.074554,0.064895,0.057497
4,vac_only_no_8x8_in_train,datasets/train_defects_vac_only_no_8x8_in_trai...,datasets/test_defects_vac_only_no_8x8_in_train...,vac_only_no_8x8_in_train_bond_z_Z_400,formation_energy,400,Z,True,models/MEGNet-defect-only/formation_energy/vac...,0.0002,True,3200,113,0.097808,0.116989,0.296022,0.340954
5,vac_only_no_8x8_in_train,datasets/train_defects_vac_only_no_8x8_in_trai...,datasets/test_defects_vac_only_no_8x8_in_train...,vac_only_no_8x8_in_train_bond_z_Z_1000,formation_energy,1000,Z,True,models/MEGNet-defect-only/formation_energy/vac...,0.0002,True,3200,113,0.042911,0.0473,1.036864,1.122277
