In [1]:
%env CUDA_VISIBLE_DEVICES=

env: CUDA_VISIBLE_DEVICES=


In [2]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from megnet.models import MEGNetModel
from megnet_graphs_train import generate_expetiments, TARGETS

In [3]:
experiments = generate_expetiments()

In [4]:
save_path = os.path.join("datasets", "predicted_dichalcogenides_innopolis_202105_v3")
plots_path = os.path.join(save_path, "plots")

In [5]:
def get_prediction_column(target):
  return f"predicted_{target}"

In [6]:
columns_to_save = ["_id"] + list(TARGETS) + list(map(get_prediction_column, TARGETS))

In [7]:
def process_experiment(experiment):
  data = {}
  results = {}
  results.update(experiment.__dict__)
  data_path = experiment.train_path.replace("train", r"{}")
  for data_part in ("train", "test"):
    data[data_part] = pd.read_pickle(data_path.format(data_part))
    results[f"{data_part}_size"] = len(data[data_part])
  for target in TARGETS:
    prediction_column = get_prediction_column(target)
    model = MEGNetModel.from_file(os.path.join(experiment.model_path))
    for this_data in data.values():
      this_data.loc[:, prediction_column] = model.predict_structures(this_data.defect_representation)
    fig, ax = plt.subplots()
    for data_name, data_part in data.items():
      mae = np.mean(np.abs(data_part.loc[:, target]-data_part.loc[:, prediction_column]))
      results[f"{data_name}_mae"] = mae
      ax.scatter(data_part.loc[:, target], data_part.loc[:, prediction_column],
                 label=f"{data_name}, MAE={mae:.4f}",
                 alpha=0.5)
    ax.set_xlabel(f"DFT {target}, eV")
    ax.set_ylabel(f"Predicted {target}, eV")
    ax.legend()

    lims = [
        np.min([ax.get_xlim(), ax.get_ylim()]),
        np.max([ax.get_xlim(), ax.get_ylim()]),
      ]

    ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
    ax.set_aspect('equal')
    ax.set_xlim(lims)
    ax.set_ylim(lims)
    ax.set_title(f"{experiment.name}, train size={len(data['train'])}")
    fig.savefig(os.path.join(plots_path, f"{experiment.target}_{experiment.name}.pdf"),
                bbox_inches="tight",
                metadata={
                  "Author": "Nikita Kazeev",
                  "Title": f"MEGNet on defect-only representation, {experiment.target}, {experiment.name}",
                  "Keywords": "2D materials, machine learning, graph neural network, MEGNet"}
               )
  for data_name, data_part in data.items():
    data_part.to_csv(os.path.join(save_path, f"{experiment.name}_{data_name}.csv.gz"),
                     columns=columns_to_save)
  return results

In [8]:
from multiprocessing import Pool
with Pool(20) as p:
  results = p.map(process_experiment, experiments)

In [9]:
results_pd = pd.DataFrame.from_dict(data=results)
results_pd.to_csv(os.path.join(save_path, "summary.csv.gz"), index=False)
results_pd

Unnamed: 0,train_path,name,target,epochs,add_displaced_species,add_bond_z_coord,model_path,vacancy_only,train_size,test_size,train_mae,test_mae
0,datasets/train_defects_vac_only.pickle.gzip,vac_only_bond_z_werespecies,homo,1000,True,True,models/MEGNet-defect-only/homo/vac_only_bond_z...,True,2392,798,24.196569,24.184268
1,datasets/train_defects_vac_only.pickle.gzip,vac_only_werespecies,homo,1000,True,False,models/MEGNet-defect-only/homo/vac_only_weresp...,True,2392,798,24.036879,24.018488
2,datasets/train_defects_vac_only.pickle.gzip,vac_only_bond_z,homo,1000,False,True,models/MEGNet-defect-only/homo/vac_only_bond_z,True,2392,798,24.13071,24.116717
3,datasets/train_defects_vac_only.pickle.gzip,vac_only,homo,1000,False,False,models/MEGNet-defect-only/homo/vac_only,True,2392,798,24.08045,24.064449
4,datasets/train_defects.pickle.gzip,full_bond_z_werespecies,homo,1000,True,True,models/MEGNet-defect-only/homo/full_bond_z_wer...,False,2595,866,22.284629,22.690855
5,datasets/train_defects.pickle.gzip,full_werespecies,homo,1000,True,False,models/MEGNet-defect-only/homo/full_werespecies,False,2595,866,22.400908,22.816002
6,datasets/train_defects.pickle.gzip,full_bond_z,homo,1000,False,True,models/MEGNet-defect-only/homo/full_bond_z,False,2595,866,22.33481,22.746271
7,datasets/train_defects.pickle.gzip,full,homo,1000,False,False,models/MEGNet-defect-only/homo/full,False,2595,866,22.429906,22.847315
8,datasets/train_defects_vac_only.pickle.gzip,vac_only_bond_z_werespecies,formation_energy,1000,True,True,models/MEGNet-defect-only/formation_energy/vac...,True,2392,798,0.665733,0.727767
9,datasets/train_defects_vac_only.pickle.gzip,vac_only_werespecies,formation_energy,1000,True,False,models/MEGNet-defect-only/formation_energy/vac...,True,2392,798,0.680168,0.716939
