In [1]:
import graphlab as gl  # library for SFrames
import graphlab.aggregate as agg # functions for groupby on the SFrames
import numpy as np
import matplotlib.pyplot as plt
import math
import os # to loop through all files in a folder


In [2]:
def Stat_It_Up(filename):     #defines the method, input is text file name
    #Importing in the txt file as an SFrame
    Raw_Image = gl.SFrame.read_csv(filename, delimiter = '	',header = True)   
    
    #shift points so the min is (0,0)
    Sauce = gl.SFrame({'X': Raw_Image['Xc']-min(Raw_Image['Xc']),'Y': Raw_Image['Yc']-min(Raw_Image['Yc'])})
    
    #Runs the DBSCAN on the points
    Cluster = gl.dbscan.create(Sauce, radius = 30, min_core_neighbors = 20)
    
    #Give each point its own identification number
    Sauce = Sauce.add_row_number('row_id')
    
    #Add the cluster identity to each point 
    Sauce = Sauce.join(Cluster['cluster_id'], on='row_id', how='left')
    
    #Change the name of the column
    Sauce = Sauce.rename({'cluster_id': 'dbscan_id'})
    
    #Find the centroid of each cluster by averageing all points in that cluster
    Centers = Sauce.groupby(key_columns='dbscan_id',operations = {'center_X' : agg.MEAN('X'),'center_Y': agg.MEAN('Y')})
    
    #Add the center values to the total data
    Sauce = Sauce.join(Centers,on = 'dbscan_id', how = 'left')
    
    #determine distance away for each point from its center
    Sauce.add_column(((Sauce['X']-Sauce['center_X'])**2+(Sauce['Y']-Sauce['center_Y'])**2)**0.5, name = 'Distance')
    
    #compute the number of points in, the mean distance value and the stdev distance value for each cluster
    Size_Measure = Sauce.groupby(key_columns='dbscan_id',operations={'stdev_dist':agg.STDV('Distance'),
                                                                     'Num_points':agg.COUNT('row_id'),
                                                                     'mean_dist':agg.MEAN('Distance')} )
    #add the file id to the SFrame
    Size_Measure['File']=filename
    
    #determining the treatment and the identity from the file name
    if filename.find('++') > 0:
        #Treat = 'Antagonist'
        Treat = 'CytD'
    elif filename.find('+-')>0:
        #Treat = 'Nicotine'
        Treat = 'mbCD'
    else :
        Treat = "Control"
    
    if filename.find('E')>0:
        Subject= 'Extension'
    else:
        Subject ='Varicosity'
        
    Size_Measure['Treatment']= Treat
    Size_Measure['Subject']=Subject
    
    #organized
    Size_Measure = Size_Measure.sort(['File',"dbscan_id"])
    return Size_Measure
    



In [3]:

#Makes a blank total data
Total = gl.SFrame({'dbscan_id' : gl.SArray(dtype = int), 'Num_points':gl.SArray(dtype = int),
                   'stdev_dist': gl.SArray(dtype = float),'mean_dist':gl.SArray(dtype = float),
                   'File':gl.SArray(dtype = str),'Treatment':gl.SArray(dtype = str),
                   'Subject':gl.SArray(dtype = str)})
for f in os.listdir('Cluster_Data'):     #loads up every txt file in the cluster_data dir but sources from main dir
    Total = Total.append(Stat_It_Up(f))    #makes master list
    Total.export_csv(filename = 'Cluster_Stats.txt', delimiter = '	')   #Saves total data



This non-commercial license of GraphLab Create is assigned to mdlycas@sund.ku.dk and will expire on March 08, 2017. For commercial licensing options, visit https://turi.com/buy/.


[INFO] graphlab.cython.cy_server: GraphLab Create v2.0.1 started. Logging: C:\Users\jdr248\AppData\Local\Temp\graphlab_server_1468496665.log.0


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.
[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,float,float,float,float,float,float,float,float,float,float,float,long,long,long,long,long,long]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


[INFO] graphlab.toolkits.clustering.dbscan: Identifying noise points and core points.


[INFO] graphlab.toolkits.clustering.dbscan: Constructing the core point similarity graph.


[INFO] graphlab.toolkits.clustering.dbscan: Processing boundary points.


In [4]:
Raw_data = gl.SFrame.read_csv('Cluster_Stats.txt', delimiter = '	',header = True) #import total data file

------------------------------------------------------
Inferred types from first 100 line(s) of file as 
column_type_hints=[str,long,str,str,long,float,float]
If parsing fails due to incorrect types, you can correct
the inferred type list above and pass it to read_csv in
the column_type_hints argument
------------------------------------------------------


In [5]:
Clusters = Raw_data[Raw_data['dbscan_id']>=0] #Removes all non-clustered points
Cluster_Results = Clusters.groupby(key_columns=['Subject','Treatment'], operations ={'Mean Points':agg.MEAN('Num_points'),
                                                                                     'Std Points': agg.STDV('Num_points'),
                                                                                    'Std STD dis': agg.STDV('stdev_dist'),
                                                                                     'Mean STD dis':agg.MEAN('stdev_dist'),
                                                                                    'number of clust': agg.COUNT('Num_points')})
Cluster_Results.add_column(Cluster_Results['Std Points']/((Cluster_Results['number of clust'])**0.5), 'SEM points')
Cluster_Results.add_column(Cluster_Results['Std STD dis']/((Cluster_Results['number of clust'])**0.5), 'SEM STD')
Cluster_Results = Cluster_Results.sort(['Subject','Treatment'])


In [6]:
Clusters = Clusters.sort(['Subject','Treatment'])
Clusters.export_csv(filename = 'Clusters.txt', delimiter = '	')

In [None]:
Clusters.head(4)

In [None]:
n, bins, patches = plt.hist([Clusters['Num_points'][VCMask]], bins=np.arange(0, max(Clusters['Num_points'][VNMask]) + 0.5, 0.5),facecolor='green', normed=1, alpha=0.5)
n2, bins2, patches2 = plt.hist([Clusters['Num_points'][VNMask]], bins=np.arange(0, max(Clusters['Num_points'][VNMask]) + 0.5, 0.5),facecolor='red', normed=1, alpha = 0.5)
n2, bins2, patches2 = plt.hist([Clusters['Num_points'][VAMask]], bins=np.arange(0, max(Clusters['Num_points'][VNMask]) + 0.5, 0.5),facecolor='blue', normed=1, alpha = 0.5)
plt.axis([20, 25, 0, 0.2])
plt.show()

In [None]:
VMask = Clusters['Subject'] == 'Varicosity' 
EMask = Clusters['Subject'] == 'Extension' 
CMask = Clusters['Treatment'] == 'Control'
NMask = Clusters['Treatment'] == 'Nicotine'
AMask = Clusters['Treatment'] == 'Antagonist'
VCMask = VMask & CMask
VNMask = VMask & NMask
VAMask = VMask & VMask

