## Lesson about Tabular Data

In [1]:
import numpy

In [2]:
help(numpy.genfromtxt)

Help on function genfromtxt in module numpy:

genfromtxt(fname, dtype=<class 'float'>, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt='f%i', unpack=None, usemask=False, loose=True, invalid_raise=True, max_rows=None, encoding='bytes')
    Load data from a text file, with missing values handled as specified.
    
    Each line past the first `skip_header` lines is split at the `delimiter`
    character, and characters following the `comments` character are discarded.
    
    Parameters
    ----------
    fname : file, str, pathlib.Path, list of str, generator
        File, filename, list, or generator to read.  If the filename
        extension is `.gz` or `.bz2`, the file is first decompressed. Note
        that generators must return byte strings in Python 3k.  The strings
        in a 

In [7]:
import os
distance_file = os.path.join('data', 'distance_data_headers.csv')
print(distance_file)

data/distance_data_headers.csv


In [5]:
distance_file = os.path.join('data', 'distance_data_headers.csv')

NameError: name 'os' is not defined

In [8]:
distances = numpy.genfromtxt(fname=distance_file, delimiter=',', dtype='unicode')
print(distances)

[['Frame' 'THR4_ATP' 'THR4_ASP' 'TYR6_ATP' 'TYR6_ASP']
 ['1' '8.9542' '5.8024' '11.5478' '9.9557']
 ['2' '8.6181' '6.0942' '13.9594' '11.6945']
 ...
 ['9998' '8.6625' '7.7306' '9.5469' '10.3063']
 ['9999' '9.2456' '7.8886' '9.8151' '10.7564']
 ['10000' '8.8135' '7.917' '9.9517' '10.7848']]


In [9]:
headers = distances[0]
print(headers)

['Frame' 'THR4_ATP' 'THR4_ASP' 'TYR6_ATP' 'TYR6_ASP']


In [10]:
data = distances[1:]
print(data)

[['1' '8.9542' '5.8024' '11.5478' '9.9557']
 ['2' '8.6181' '6.0942' '13.9594' '11.6945']
 ['3' '9.0066' '6.0637' '13.0924' '11.3043']
 ...
 ['9998' '8.6625' '7.7306' '9.5469' '10.3063']
 ['9999' '9.2456' '7.8886' '9.8151' '10.7564']
 ['10000' '8.8135' '7.917' '9.9517' '10.7848']]


In [11]:
print(data[0,1])

8.9542


In [12]:
print(data[1,0])

2


In [13]:
small_data = data[0:10, 0:3]
print(small_data)

[['1' '8.9542' '5.8024']
 ['2' '8.6181' '6.0942']
 ['3' '9.0066' '6.0637']
 ['4' '9.2002' '6.0227']
 ['5' '9.1294' '5.9365']
 ['6' '9.0462' '6.2553']
 ['7' '8.8657' '5.9186']
 ['8' '9.3256' '6.2351']
 ['9' '9.4184' '6.1993']
 ['10' '9.06' '6.0478']]


In [14]:
print(small_data[5, :1])

['6']


In [15]:
print(small_data[5, :])

['6' '9.0462' '6.2553']


In [16]:
print(small_data[:,1:])

[['8.9542' '5.8024']
 ['8.6181' '6.0942']
 ['9.0066' '6.0637']
 ['9.2002' '6.0227']
 ['9.1294' '5.9365']
 ['9.0462' '6.2553']
 ['8.8657' '5.9186']
 ['9.3256' '6.2351']
 ['9.4184' '6.1993']
 ['9.06' '6.0478']]


In [24]:
thr4_atp = print(data[:,1])
avg_thr4_atp = numpy.mean(thr4_atp)

['8.9542' '8.6181' '9.0066' ... '8.6625' '9.2456' '8.8135']


TypeError: unsupported operand type(s) for /: 'NoneType' and 'int'

In [25]:
data = data.astype(numpy.float)

In [29]:
thr4_atp = data[:,1]
avg_thr4_atp = numpy.mean(thr4_atp)
print(avg_thr4_atp)

10.876950930000001


In [30]:
num_columns = len(headers)
print(num_columns)

5


In [31]:
range (1, num_columns)

range(1, 5)

In [33]:
for i in range(1, num_columns):
    column = data[:,i]
    avg_col = numpy.mean(column)
    print(F'{headers[i]} : {avg_col:.3f}')

THR4_ATP : 10.877
THR4_ASP : 7.342
TYR6_ATP : 11.210
TYR6_ASP : 10.993
