# Cross validation

In [2]:
import os
import sys
from fitting import FitModel
import numpy as np
import matplotlib.pyplot as plt
import json
import glob

In [3]:
def get_forces(fit_data, values, args):
    fit_data.init_potential(values, args)
    ip_forces = fit_data.get_forces()
    dft_forces = fit_data.expected_forces()
    return dft_forces, ip_forces

def chi_squared_error(dft_forces, ip_forces):
    return np.sum((dft_forces - ip_forces)**2)/ dft_forces.size

def create_directory(head_directory_name, structure_number):
    directory = os.path.join(head_directory_name, str(structure_number))
    os.makedirs(directory)
    return directory

### Set up parameters for LiNiO2 with core-shell O-O

In [4]:
params = {}
params['core_shell'] = { 'Li': False, 'Ni': False, 'O': True }
params['charges'] = {'Li': +1.0,
                     'Ni': +3.0,
                     'O': {'core':  -2.0, #+0.960,
                           'shell': 0.0}} #-2.960}}
params['masses'] = {'Li': 6.941,
                    'Ni': 58.6934,
                    'O': {'core': 14.3991,
                          'shell': 1.5999} }
params['cs_springs'] = {'O-O' : [20.0, 0.0]}

distribution = {}
distribution['Li-O'] = {'bpp' : [663.111, 0.119, 0.0],
                        'sd' : [80, 0.01, 0.01]}
distribution['Ni-O'] = {'bpp' : [1393.540, 0.218, 0.000],
                        'sd'  : [80, 0.01, 0.01]}
distribution['O-O'] = {'bpp' : [25804.807, 0.284, 0.0],
                       'sd'  : [200, 0.01, 5]}

### Define the directory paths and names/number of structures

In [8]:
# Define number of structures
tot_num_structures = 15
num_struct_to_fit = 1
num_of_fits = 1

# Define paths to poscar/outcar directories
poscar_directory = os.path.join('poscars','thermos')
outcar_directory = os.path.join('outcars','thermos')

# Create cross validation directory
head_directory_name = '{}_structure_fits'.format(num_struct_to_fit)
cv_directory_name = 'cross_validation'
head_output_directory = create_directory(head_directory_name, cv_directory_name)

FileExistsError: [Errno 17] File exists: '1_structure_fits/cross_validation'

In [6]:
def validation_sets(num_of_fits, tot_num_structures, num_struct_to_fit):
    sets_of_structures = []
    while len(sets_of_structures) < num_of_fits:
        struct_set = np.sort(np.random.randint(0,tot_num_structures, size=num_struct_to_fit), axis=0)
        if len(set(struct_set)) != num_struct_to_fit:
            continue
        if not any(np.array_equiv(struct_set, x) for x in sets_of_structures):
            sets_of_structures.append(struct_set) 
    return np.array(sets_of_structures)

### Calculates each structures potential with every other structure, returning the forces and error

In [5]:
for potential_file in sorted(glob.glob('{}/*/potentials.json'.format(head_directory_name))): #change 1 to * for all cross-validations
    with open(potential_file, 'r') as f:
        potentials = json.load(f)
    structure_nums = int(potential_file.replace('/potentials.json', '').replace('{}/'.format(head_directory_name),''))
    include_labels = list(potentials.keys())
    include_values = list(potentials.values())
    indv_output_directory = create_directory(head_output_directory, 'p{}'.format(structure_nums))
    
    
    #####In here goes the creation of the sets_of_structures list making sure not to repeat current structure####
    
    
    
    for fit, structs in enumerate(sets_of_structures): 
        for struct_num, struct in enumerate(structs):
            os.system('cp {}/POSCAR{} {}/POSCAR{}'.format(poscar_directory, struct+1, 'poscars', struct_num+1))
            os.system('cp {}/OUTCAR{} {}/OUTCAR{}'.format(outcar_directory, struct+1, 'outcars', struct_num+1))    
        fit_data = FitModel.collect_info(params, distribution, supercell=[2,2,2])
        dft_forces, ip_forces = get_forces(fit_data, include_values, include_labels)
        error = chi_squared_error(dft_forces, ip_forces)     
#         print('{}/p{}-s{}_dft_forces.dat'.format(indv_output_directory, structure_num, structure+1))
        np.savetxt('{}/p{}-s{}_dft_forces.dat'.format(indv_output_directory, structure_num, structure+1), dft_forces, fmt='%.10e', delimiter=' ')
        np.savetxt('{}/p{}-s{}_ip_forces.dat'.format(indv_output_directory, structure_num, structure+1), ip_forces, fmt='%.10e', delimiter=' ')
        with open('{}/p{}-s{}_error.dat'.format(indv_output_directory, structure_num, structure+1), 'w') as f:
            f.write(str(error))    
    


Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements: ['Li', 'Ni', 'O']
Found elements