In [1]:
%env CUDA_VISIBLE_DEVICES=

env: CUDA_VISIBLE_DEVICES=


In [2]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from megnet.models import MEGNetModel
from megnet_graphs_train import generate_expetiments, TARGETS

In [3]:
experiments = generate_expetiments()

In [4]:
save_path = os.path.join("datasets", "predicted_dichalcogenides_innopolis_202105_v3")
plots_path = os.path.join(save_path, "plots")

In [5]:
def get_prediction_column(target):
  return f"predicted_{target}"

In [6]:
def process_experiment(experiment):
  data = {}
  results = {}
  results.update(experiment.__dict__)
  data_path = experiment.train_path.replace("train", r"{}")
  for data_part in ("train", "test"):
    data[data_part] = pd.read_pickle(data_path.format(data_part))
    results[f"{data_part}_size"] = len(data[data_part])
  prediction_column = get_prediction_column(experiment.target)
  model = MEGNetModel.from_file(os.path.join(experiment.model_path))
  for this_data in data.values():
    this_data.loc[:, prediction_column] = model.predict_structures(this_data.defect_representation)
  fig, ax = plt.subplots()
  for data_name, data_part in data.items():
    mae = np.mean(np.abs(data_part.loc[:, experiment.target]-data_part.loc[:, prediction_column]))
    results[f"{data_name}_mae"] = mae
    ax.scatter(data_part.loc[:, experiment.target], data_part.loc[:, prediction_column],
               label=f"{data_name}, MAE={mae:.4f}",
               alpha=0.5)
  ax.set_xlabel(f"DFT {experiment.target}, eV")
  ax.set_ylabel(f"Predicted {experiment.target}, eV")
  ax.legend()

  lims = [
      np.min([ax.get_xlim(), ax.get_ylim()]),
      np.max([ax.get_xlim(), ax.get_ylim()]),
    ]

  ax.plot(lims, lims, 'k-', alpha=0.75, zorder=0)
  ax.set_aspect('equal')
  ax.set_xlim(lims)
  ax.set_ylim(lims)
  ax.set_title(f"{experiment.name}, train size={len(data['train'])}")
  fig.savefig(os.path.join(plots_path, f"{experiment.target}_{experiment.name}.pdf"),
              bbox_inches="tight",
              metadata={
                "Author": "Nikita Kazeev",
                "Title": f"MEGNet on defect-only representation, {experiment.target}, {experiment.name}",
                "Keywords": "2D materials, machine learning, graph neural network, MEGNet"}
             )
  columns_to_save = ["_id", experiment.target, get_prediction_column(experiment.target)]
  for data_name, data_part in data.items():
    data_part.to_csv(os.path.join(save_path, f"{experiment.name}_{experiment.target}_{data_name}.csv.gz"),
                     columns=columns_to_save)
  return results

In [7]:
from multiprocessing import Pool
with Pool(20) as p:
  results = p.map(process_experiment, experiments)

In [8]:
results_pd = pd.DataFrame.from_dict(data=results)
results_pd.to_csv(os.path.join(save_path, "summary.csv.gz"), index=False)
results_pd

Unnamed: 0,train_path,test_path,name,target,epochs,atom_features,add_bond_z_coord,model_path,vacancy_only,train_size,test_size,train_mae,test_mae
0,datasets/train_defects_vac_only.pickle.gzip,datasets/test_defects_vac_only.pickle.gzip,vac_only_bond_z_Z,homo,1000,Z,True,models/MEGNet-defect-only/homo/vac_only_bond_z_Z,True,2392,798,0.005017,0.01579
1,datasets/train_defects_vac_only.pickle.gzip,datasets/test_defects_vac_only.pickle.gzip,vac_only_Z,homo,1000,Z,False,models/MEGNet-defect-only/homo/vac_only_Z,True,2392,798,0.012815,0.023003
2,datasets/train_defects_vac_only.pickle.gzip,datasets/test_defects_vac_only.pickle.gzip,vac_only_bond_z_embed,homo,1000,embed,True,models/MEGNet-defect-only/homo/vac_only_bond_z...,True,2392,798,0.006427,0.016545
3,datasets/train_defects_vac_only.pickle.gzip,datasets/test_defects_vac_only.pickle.gzip,vac_only_embed,homo,1000,embed,False,models/MEGNet-defect-only/homo/vac_only_embed,True,2392,798,0.006017,0.018874
4,datasets/train_defects_vac_only.pickle.gzip,datasets/test_defects_vac_only.pickle.gzip,vac_only_bond_z_werespecies,homo,1000,werespecies,True,models/MEGNet-defect-only/homo/vac_only_bond_z...,True,2392,798,0.006444,0.016809
5,datasets/train_defects_vac_only.pickle.gzip,datasets/test_defects_vac_only.pickle.gzip,vac_only_werespecies,homo,1000,werespecies,False,models/MEGNet-defect-only/homo/vac_only_weresp...,True,2392,798,0.007247,0.018487
6,datasets/train_defects.pickle.gzip,datasets/test_defects.pickle.gzip,full_bond_z_Z,homo,1000,Z,True,models/MEGNet-defect-only/homo/full_bond_z_Z,False,2595,866,0.007751,0.018705
7,datasets/train_defects.pickle.gzip,datasets/test_defects.pickle.gzip,full_Z,homo,1000,Z,False,models/MEGNet-defect-only/homo/full_Z,False,2595,866,0.006706,0.018682
8,datasets/train_defects.pickle.gzip,datasets/test_defects.pickle.gzip,full_bond_z_embed,homo,1000,embed,True,models/MEGNet-defect-only/homo/full_bond_z_embed,False,2595,866,0.006206,0.015564
9,datasets/train_defects.pickle.gzip,datasets/test_defects.pickle.gzip,full_embed,homo,1000,embed,False,models/MEGNet-defect-only/homo/full_embed,False,2595,866,0.00484,0.016415
