In [333]:
import numpy as np
import inspect
import os
import operator

### General File Path Definition

Defines the file location to the directory holding the iPython notebook.

In [334]:
file_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
file_name = 'POSCAR_ZrB2_silicene_11x2_v2.vesta'
file_path = os.path.join(file_dir, file_name)

### File Read Test

In [335]:
file = open(file_path, 'r')  # Open file in read only mode.
print(file.readline())  # Read a single line to make sure everything is ok.
file.close()  # Close file.

#VESTA_FORMAT_VERSION 3.3.0



### Read File

Reads in file as a single string.

In [336]:
file = open(file_path, 'r')  # Open file in read only mode.
file_data = file.read()  # Read the entire file and save as a variable.
file.close()  # Close the file.

Split string by newline '\n' commands.

In [337]:
file_data = file_data.split('\n')  # Split the file by the newline command \n

Define python dictionary of keywords in vesta file that we will then get the indexes for.

In [338]:
keyword_indx = {
    'CRYSTAL': None,
    'TITLE': None,
    'GROUP': None,
    'TRANM 0': None,
    'LTRANSL': None,
    'LORIENT': None,
    'LMATRIX': None,
    'CELLP': None,
    'STRUC': None,
    'THERI 0': None,
    'SHAPE': None,
    'BOUND': None,
    'SBOND': None,
    'SITET': None,    
}

Find the indexes of the different keywords in the vesta file.

In [339]:
keyword_list = list(keyword_indx.keys())  # defines a list of the keywords.
for i in range(len(keyword_list)):  # Iterates over the length of keyword_list.
    # For each keyword finds the element index in file_data and saves it to the keyword_indx dictionary.
    keyword_indx[keyword_list[i]] = file_data.index(keyword_list[i])

### Get CELLP data

In [340]:
cellp_data = []  # Empty tuple for cellp data.
for i in range(keyword_indx['CELLP'], keyword_indx['STRUC']):  # Iterates from cellp index to struc index.
    cellp_data.append(file_data[i])  # Appends the line in cellp to cellp_data.
for i in range(1,len(cellp_data)):  # Iterates over the range of cellp_data, ignoring the 1st element.
    # Converts strings to floats, using double whitespace as separator.
    cellp_data[i] = np.fromstring(cellp_data[i], dtype=float, sep='  ')

In [341]:
cellp_data  # Looks at cellp_data

['CELLP',
 array([ 60.426811,   6.34317 ,  30.      ,  90.      ,  90.      ,  90.      ]),
 array([ 0.,  0.,  0.,  0.,  0.,  0.])]

### STRUC Data

Separates out the structure data from file_data.

In [342]:
struc_data = []  # Empty tuple for strucure data to be added.
for i in range(keyword_indx['STRUC'], keyword_indx['THERI 0']):  # Iteratres between struc and theri 0 keyword indexes.
    struc_data.append(file_data[i])  # Appends the lines to struc_data.

Line wise splits single string by white space and then removes elements that are empty.

In [343]:
for i in range(1, len(struc_data)):  # Iterates over the length of struc_data, ignoring 1st element.
    temp_line = np.array(struc_data[i].split(' '))  # splits each line by single whitespace character.
    temp_line_1 = np.array([])  # Creates an empty numpy array.
    for j in range(0, len(temp_line)):  # Iterates over the length of split string list in temp_line.
        if not temp_line[j] == '':  #  Checks if element is empty.
            temp_line_1 = np.append(temp_line_1, temp_line[j])  # If not empty appends to temp_line_1
    struc_data[i] = temp_line_1  # Sets temp_line_1 to struc_data line.
del temp_line, temp_line_1  # Deletes temporary lines.

A function to check if a string can be converted to a float.

In [344]:
def str2float_check(string):
    try:
        float(string)  # Attempts to convert a string to a float.
        return True  # If attempt was successful returns true.
    except ValueError:
        return False  #If attempt fails returns false.

iterates over the structure data linewise, and then element wise to convert strings that contain a number to a float.

In [345]:
for i in range(1, len(struc_data)):  # iterates over lines of struc_data
    temp_line = []  # create a temporary line
    for j in range(len(struc_data[i])):  # iterates over elements of line 'i' in struc_data
        if str2float_check(struc_data[i][j]):  # returns true if element can be converted to a float.
            temp_line.append(float(struc_data[i][j]))  # Adds float conversion to temp_line.
        else:
            temp_line.append(struc_data[i][j])  # Adds string to temp_line.
    struc_data[i] = temp_line  # Sets struc_data line to temp_line.
del temp_line  # Deletes temporary lines.

#### Test of struc_data
Here we will quickly look at some of the lines in struc_data to make sure they look ok.

In [346]:
struc_data[0:5]

['STRUC',
 [1.0, 'Zr', 'Zr1', 1.0, 0.0, 0.0, 0.066667, '1a', 1.0],
 [0.0, 0.0, 0.0, 0.0],
 [2.0, 'Zr', 'Zr2', 1.0, 0.909091, 0.0, 0.066667, '1a', 1.0],
 [0.0, 0.0, 0.0, 0.0]]

## Sorted Keywords
Sorts the vesta file keywords by their position in struc_data

In [347]:
keywords_srt = sorted(keyword_indx.items(), key=operator.itemgetter(1))
keywords_srt

[('CRYSTAL', 3),
 ('TITLE', 5),
 ('GROUP', 8),
 ('TRANM 0', 13),
 ('LTRANSL', 15),
 ('LORIENT', 18),
 ('LMATRIX', 22),
 ('CELLP', 28),
 ('STRUC', 31),
 ('THERI 0', 777),
 ('SHAPE', 1151),
 ('BOUND', 1153),
 ('SBOND', 1156),
 ('SITET', 1160)]

## Define path to new file

In [348]:
new_file_name = 'test_vesta_file.vesta'  # Name file will be created with.
new_file_path = os.path.join(file_dir, new_file_name)  # Defines pull file path, from workbook directory.

### Generate Absolute coordinates from STRUC and CELLP
At the moment, all coordinates are relative to the supercell dimensions. In order to add more supercells, we must first recover the absolute coordinates, then iterate over the supercell, before re-converting all coordinates into relative form.

In [349]:
abs_coord_data = []
for i in range(1, len(struc_data)): #iterates over struc_data starting at element 1
    abs_coord_data.append(struc_data[i][:]) #adds each line of struc_data as a copy (the colon operator is important here)
for i in range(0, len(abs_coord_data), 2): #iterates over abs_coord_data in steps of 2
    abs_coord_data[i][4:7] = np.multiply(cellp_data[1][0:3], abs_coord_data[i][4:7]) 
    #multiplies the cellp elementwise into the coordinate data

### Create New CELLP data
Below we create new cell parameters with double dimensions of the previous ones

In [350]:
a_multiplier = 2 #defines cell parameter length multipliers
b_multiplier = 8
c_multiplier = 1
mult_matrix = [a_multiplier, b_multiplier, c_multiplier] #creates matrix for elementwise multiplication
cellp_data2 = cellp_data[:] #create new cellp data with double the dimensions of original.
cellp_data2[1] = +cellp_data[1][:] # + prefix creates a new numpy array (slicing with the colon operator doesn't work)
cellp_data2[1][0:3] = np.multiply(mult_matrix, cellp_data2[1][0:3]) #creates supercell of multiple size

### Create New STRUC elements with absolute coordinates
Next we need to use the absolute coordinate data to create absolute coordinates for our new atoms.

Below, I create a directory for our cell repeats. It is 3D, so we can access each set of structural data by its repetition along the axes. E.g the original data is at abs_data_directory[0][0][0], the first repeat in a only is at abs_data_directory[1][0][0] and so on.

In [351]:
abs_data_directory = [] #creates 3D array as a directory of every individual cell repeat
for i in range(0, a_multiplier): #loops over a-repeat
    abs_data_directory.append([])
    for j in range(0, b_multiplier): #nested loop over b-repeat
        abs_data_directory[i].append([])
        for k in range(0, c_multiplier): #doubly nested loop over c-repeat
            abs_data_directory[i][j].append([])


In [352]:
for i in range(0, a_multiplier): #loops over a-repeat
    for j in range(0, b_multiplier): #nested loop over b-repeat
        for k in range(0, c_multiplier): #doubly nested loop over c-repeat
            for p in range(0, len(abs_coord_data)):
                abs_data_directory[i][j][k].append(abs_coord_data[p][:])
            #each dimensional element in the directory is now a copy of abs_coord_data


In [353]:
for i in range(0, a_multiplier): #loops over a-repeat
    for j in range(0, b_multiplier): #nested loop over b-repeat
        for k in range(0, c_multiplier): #doubly nested loop over c-repeat
            for p in range(0,len(abs_data_directory[i][j][k]), 2): #loops over each array element in steps of 2
                abs_data_directory[i][j][k][p][4:7] = np.multiply([i,j,k], cellp_data[1][0:3]) + abs_data_directory[i][j][k][p][4:7]
                #[i,j,k] are our "multiplicity counters" for adding the cellp data to. 
                #I.e first repeat in a has i=1, second has i=2, etc.

Now, we have a directory for each cell repeat. This will make repeating the structure for large numbers much easier.

In [354]:
abs_coord_data2 = [] #defines the abs_coord_data2 initially as a copy of the other data sliced line by line
for i in range(0, len(abs_coord_data)): #loops over old abs_coord_data
    abs_coord_data2.append(abs_coord_data[i][:]) #adds each line individually
for i in range(0, len(abs_coord_data2), 2): #loops over abs_coord_data2 in steps of 2 (to skip lines with zeros)
    abs_coord_data2[i][4:7] = cellp_data[1][0:3] + abs_coord_data2[i][4:7] #shifts new atoms along by one supercell length

### Reformat the text data so VESTA can read it
VESTA expects different integers in the first column, so we need to change abs_coord_data2 to have this. It also expects differently labled atoms, which we also need to change, because currently the atom labels are copies of the original. There are 373 atoms per supercell, so we need a*b*c*373 labels.

In [355]:
struc_data_new = ['STRUC'] #adds STRUC header
for i in range(0, a_multiplier): #loops over a-repeat
    for j in range(0, b_multiplier): #nested loop over b-repeat
        for k in range(0, c_multiplier): #doubly nested loop over c-repeat
            for p in range(0, len(abs_data_directory[i][j][k])-1): #adds all of original struc data, eliminating the final element
                struc_data_new.append(abs_data_directory[i][j][k][p][:])


In [356]:
c = 0 #define Zr atom count
c1 = 0 #define B atom count
c2 = 0 #define Si atom count
c3 = 0 #define line count
for i in range(1, len(struc_data_new), 2): #iterates over struc_data2 in steps of 2 skipping first element
    c3 = c3 + 1
    struc_data_new[i][0] = int(c3) #redefines the line count as an integer
    if struc_data_new[i][1] == "Zr": #checks if the atom is a Zr atom.
        c = c + 1 #increases the Zr atom count
        struc_data_new[i][2] = "Zr" + str(c) #relabels the Zr atom based on its count
    elif struc_data_new[i][1] == "B":
            c1 = c1 + 1
            struc_data_new[i][2] = "B" + str(c1)
    elif struc_data_new[i][1] == "Si":
        c2 = c2 + 1
        struc_data_new[i][2] = "Si" +str(c2)
del c #deletes our counters
del c1
del c2
del c3

### Convert Absolute Coordinates into Relative Coordinates
Now, we convert our absolute coordinates back into relative coordinates.

In [357]:
for i in range(1, len(struc_data_new), 2): #iterates over length of struc_data2 in steps of 2 skipping first element
    struc_data_new[i][4:7] = np.divide(struc_data_new[i][4:7], cellp_data2[1][0:3]) 
    #divides the absolute coordinates element-wise by their respective cellp

### Write VESTA Header

In [358]:
new_file = open(new_file_path, 'a')  # Looks for file with name/path new_file_path, either creates or opens in append mode.
new_file.write(file_data[0])  # Wrties version header to file.
new_file.write('\n\n')  # Added two blank lines before next first keyword.
new_file.close()  # Closes new file.

### Write CRYSTAL SECTION

Below I write the Crystal and Title sections, by creating a temporary list which I then iterate over and add element by element (each element is a line of text) to the new file

In [359]:
new_file =open(new_file_path, 'a')#Looks for file with name/path new_file_path, either creates or opens in append mode.
temp_list = file_data[keywords_srt[0][1]:keywords_srt[1][1]] #defines a list of the data in the CRYSTAL section
for i in range(0, len(temp_list)): #iterates over the length of the list
    new_file.write('%s\n' % temp_list[i])#writes the CRYSTAL section to the file line by line, and starts a new line.
del temp_list #deletes the temporary list.
new_file.close() #closes new file.

### Write TITLE SECTION

In [360]:
new_file=open(new_file_path, 'a') #Looks for file with name/path new_file_path, either creates or opens in append mode.
temp_list = file_data[keywords_srt[1][1]:keywords_srt[2][1]] #defines a list of the data in the TITLE section
for i in range(0, len(temp_list)): #iterates over the length of the list
    new_file.write('%s\n' % temp_list[i]) #writes the TITLE section to the file line by line, and starts a new line.
del temp_list #deletes the temporary list.
new_file.close() #closes new file.

### Write Group up to CELLP

This time, instead of doing it section by section, I decided to write in all of the remaining sections before the cell parameters, using the same method as above.

In [361]:
new_file=open(new_file_path, 'a') #Looks for file with name/path new_file_path, either creates or opens in append mode.
temp_list = file_data[keywords_srt[2][1]:keywords_srt[7][1]] #defines a list of the lines between GROUP and CELLP
for i in range(0, len(temp_list)): #iterates over the length of the list
    new_file.write('%s\n' % temp_list[i]) #writes each line, and begins a new line.
del temp_list #deletes the temporary list.
new_file.close() #closes the file.

### Write cell parameters to new vesta file

Below, I write the cell parameters to the new text file. The "for" loops in the middle of the code re-format the cell parameters back into strings rounded to six decimal places, as in the original file. 
UPDATE: I am using cellp2 this time.

In [362]:
new_file = open(new_file_path, 'a')  # Looks for file with name/path new_file_path, either creates or opens in append mode.
new_file.write('%s\n' % cellp_data[0])  # Writes CELLP keyword.
temp_list = [[],[]] #creates a temp list with two empty nested lists.
for i in cellp_data2[1]: 
    temp_list[0].append('{:f}'.format(i)) #reformats the cell parameters to the correct decimal places
for i in cellp_data2[2]:
    temp_list[1].append('{:f}'.format(i)) #reformats the cell parameters to the correct decimal places
for i in range(2):  # Iterates twice.
    # Formats each line in temp_list back into a string for writing into new_file_path.
    new_file.write(' %s  %s  %s  %s  %s  %s\n' % (temp_list[i][0], temp_list[i][1], temp_list[i][2], temp_list[i][3], 
                                                 temp_list[i][4], temp_list[i][5]))
del temp_list
new_file.close()  # Closes file.

### Write Structure parameters to new vesta file

 The following for loop iterates over struc_data ignoring 1st element, in steps of 2.
 Each atom on struc has two lines. The 1st contains atom number, type, and position, the 2nd contains unknown info, but is re-included for completeness. The first number in the loop needs to be input as an integer.
 UPDATE: The file can now be read in VESTA!


In [363]:
new_file = open(new_file_path, 'a')  # Looks for file with name/path new_file_path, either creates or opens in append mode.
new_file.write('%s\n' % struc_data_new[0])  # Writes STRUC keyword.
for i in range(1, len(struc_data_new)-1, 2): #(for Toby) the minus 1 had to be included because otherwise the range ends on an even number.
    new_file.write('  %i %s        %s  %s   %s   %s   %s    %s       %s\n' % (struc_data_new[i][0], struc_data_new[i][1], struc_data_new[i][2], struc_data_new[i][3], struc_data_new[i][4], struc_data_new[i][5], struc_data_new[i][6], struc_data_new[i][7], struc_data_new[i][8]))
    new_file.write('%s  %s  %s  %s\n' % (struc_data_new[i+1][0], struc_data_new[i+1][1], struc_data_new[i+1][2], struc_data_new[i+1][3]))
new_file.write('0 0 0 0 0 0 0\n')
new_file.close()

### Write the rest of the file

UPDATE: I have been able to write the entire file, however in the STRUC section there seems to be a spacing problem- the additional zeros have been removed in conversion from float to string and vice versa, and I think that is causing a spacing problem in our text file.

In [364]:
new_file = open(new_file_path, 'a') #opens the file.
temp_list = file_data[keywords_srt[9][1]:] #creates a temporary list of the rest of the lines in file_data, after STRUC.
for i in range(0, len(temp_list)): #iterates over temp_list
    new_file.write('%s\n' % temp_list[i]) #writes each line in temp_list to our file, then changes to a new line.
del temp_list #deletes our temp_list
new_file.close() #closes the file.
    

In [None]:
new_file = open(new_file_path, 'a')
new_file.close()

***

***

### Try to keep the file tidy and get rid of redundant testing cells. 