## Lesson about tabular data

In [2]:
import numpy

In [4]:
help(numpy.genfromtxt)

Help on function genfromtxt in module numpy:

genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes')
    Load data from a text file, with missing values handled as specified.
    
    Each line past the first `skip_header` lines is split at the `delimiter`
    character, and characters following the `comments` character are discarded.
    
    Parameters
    ----------
    fname : file, str, pathlib.Path, list of str, generator
        File, filename, list, or generator to read.  If the filename
        extension is `.gz` or `.bz2`, the file is first decompressed. Note
        that generators must return byte strings in Python 3k.  The strings
        in a 

In [5]:
import os

In [6]:
distance_file = os.path.join('data', 'distance_data_headers.csv')

In [7]:
distance = numpy.genfromtxt(fname=distance_file, delimiter=',', dtype='unicode')

In [8]:
print(distance)

[['Frame' 'THR4_ATP' 'THR4_ASP' 'TYR6_ATP' 'TYR6_ASP']
 ['1' '8.9542' '5.8024' '11.5478' '9.9557']
 ['2' '8.6181' '6.0942' '13.9594' '11.6945']
 ...
 ['9998' '8.6625' '7.7306' '9.5469' '10.3063']
 ['9999' '9.2456' '7.8886' '9.8151' '10.7564']
 ['10000' '8.8135' '7.917' '9.9517' '10.7848']]


In [10]:
header = distance[0]
print (header)

['Frame' 'THR4_ATP' 'THR4_ASP' 'TYR6_ATP' 'TYR6_ASP']


In [11]:
data = distance[1:]
print(data)

[['1' '8.9542' '5.8024' '11.5478' '9.9557']
 ['2' '8.6181' '6.0942' '13.9594' '11.6945']
 ['3' '9.0066' '6.0637' '13.0924' '11.3043']
 ...
 ['9998' '8.6625' '7.7306' '9.5469' '10.3063']
 ['9999' '9.2456' '7.8886' '9.8151' '10.7564']
 ['10000' '8.8135' '7.917' '9.9517' '10.7848']]


In [12]:
print(data[0,1]) 

8.9542


In [13]:
print(data[1,0])

2


In [14]:
small_data = data[0:10,0:3]
print(small_data)

[['1' '8.9542' '5.8024']
 ['2' '8.6181' '6.0942']
 ['3' '9.0066' '6.0637']
 ['4' '9.2002' '6.0227']
 ['5' '9.1294' '5.9365']
 ['6' '9.0462' '6.2553']
 ['7' '8.8657' '5.9186']
 ['8' '9.3256' '6.2351']
 ['9' '9.4184' '6.1993']
 ['10' '9.06' '6.0478']]


In [15]:
print(small_data[5,:1])

['6']


In [16]:
print(small_data[5,:])

['6' '9.0462' '6.2553']


In [17]:
print(small_data[:,1:])

[['8.9542' '5.8024']
 ['8.6181' '6.0942']
 ['9.0066' '6.0637']
 ['9.2002' '6.0227']
 ['9.1294' '5.9365']
 ['9.0462' '6.2553']
 ['8.8657' '5.9186']
 ['9.3256' '6.2351']
 ['9.4184' '6.1993']
 ['9.06' '6.0478']]


In [19]:
thr4_atp = data[:,1:]
print(thr4_atp)


[['8.9542']
 ['8.6181']
 ['9.0066']
 ...
 ['8.6625']
 ['9.2456']
 ['8.8135']]


In [20]:
data = data.astype(numpy.float)

In [21]:
thr4_atp = data[:,1]
avg_thr4_atp = numpy.mean(thr4_atp)
print(avg_thr4_atp)

10.876950930000001


In [22]:
num_columns = len(header)
print(num_columns)

5


In [23]:
range(1,num_columns)

range(1, 5)

In [32]:
for i in range(1, num_columns):
    column = data[:,i]
    avg_col = numpy.mean(column)
    print(F'{header[i]} : {avg_col:.3f}')

THR4_ATP : 10.877
THR4_ASP : 7.342
TYR6_ATP : 11.210
TYR6_ASP : 10.993


## test

In [61]:
import os

In [67]:
pwd

'C:\\Users\\Aminul\\Desktop\\cms-worlshop'

In [62]:
water_data = os.path.join ('data', 'water.xyz')

In [68]:
water = numpy.genfromtxt(fname=water_data, dtype='unicode')


ValueError: Some errors were detected !
    Line #2 (got 3 columns instead of 1)
    Line #3 (got 4 columns instead of 1)
    Line #4 (got 4 columns instead of 1)
    Line #5 (got 4 columns instead of 1)

In [65]:
print(water)

TypeError: 'numpy.ndarray' object is not callable

In [52]:
O_coord = water[1,2:]
print(O_coord)


IndexError: too many indices for array

## test again

In [1]:
iport os

SyntaxError: invalid syntax (<ipython-input-1-252ce7951d3d>, line 1)

In [4]:
import os
import numpy

In [6]:
file_location = os.path.join('data','water.xyz')

In [7]:
print(file_location)

data\water.xyz


In [11]:
xyz_file = numpy.genfromtxt(fname=file_location, skip_header=2, dtype='unicode')
print(xyz_file)

[['O' '0.000000' '-0.007156' '0.965491']
 ['H1' '-0.000000' '0.001486' '-0.003471']
 ['H2' '0.000000' '0.931026' '1.207929']]


In [22]:
symbols = xyz_file[:,0]
coordinates = xyz_file[:,1:]
coordinates = coordinates.astype(float)

In [18]:
print (symbols)
print (coordinates)

['O' 'H1' 'H2']
[['0.000000' '-0.007156' '0.965491']
 ['-0.000000' '0.001486' '-0.003471']
 ['0.000000' '0.931026' '1.207929']]


## readline

In [15]:
h2o_xyz= open(file_location,'r')

In [16]:
data = h2o_xyz.readline()

In [17]:
print(data)

3



In [19]:
print (symbols)
print (coordinates)

['O' 'H1' 'H2']
[['0.000000' '-0.007156' '0.965491']
 ['-0.000000' '0.001486' '-0.003471']
 ['0.000000' '0.931026' '1.207929']]


In [30]:
def calculate_distance(atom1,atom2):
    x_distance = atomA[0]-atomB[0]
    y_distance = atomA[1]-atomB[1]
    z_distance = atomA[2]-atomB[2]
    distance = numpy.sqrt(x_distance**2 + y_distance**2 + z_distance**2)
    return distance

In [28]:
for numA, atomA in enumerate(coordinates):
    for numB, atomB in enumerate(coordinates):
        if numB < numA:
            x_distance = atomA[0]-atomB[0]
            y_distance = atomA[1]-atomB[1]
            z_distance = atomA[2]-atomB[2]
            distance = numpy.sqrt(x_distance**2 + y_distance**2 + z_distance**2)
            if distance>0 and distance<1.5:
                print(F'{symbols[numA]} to {symbols[numB]}: {distance:.3f}')

H1 to O: 0.969
H2 to O: 0.969


In [29]:
def function_name(parameters):
    #lines of code
    #lines of code
    #lines of code
    return value to return
    

SyntaxError: invalid syntax (<ipython-input-29-f9c92efbd983>, line 1)

In [49]:
def bond_check(bond_distance,minimum_value=0,maximum_value=2.0):
    if distance_AB>minimum_value and distance_AB<maximum_value:
        return True
    else:
        return False
        

In [50]:
for numA, atomA in enumerate(coordinates):
    for numB, atomB in enumerate(coordinates):
        if numB < numA:
            distance_AB = calculate_distance(atomA, atomB)
            if bond_check(distance_AB) is True:
                print(F'{symbols[numA]} to {symbols[numB]}: {distance_AB:.3f}')

H1 to O: 0.969
H2 to O: 0.969
H2 to H1: 1.527
