# Graph Analyses

### Goals

1. identify all csv files, and open them.
1. Print all the graphs.
2. Upload Some peak finding algorithems, and test them.
3. write a function that will display all graphs, with the peaks from an arbitrary peak finding algorithem

### Imports

In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

import subprocess

pd.set_option('display.max_columns', 10) # display columns
pd.set_option('display.max_rows', 8)    # display rows

#plot in the notebook
%matplotlib inline

#create ipython editable plot in sepperate window
#%matplotlib qt   

import matplotlib as mpl

## Read In Data And Plot

### Example: Read Data, Build Plot (With One File)

In [None]:
# Get Files
str_files  = subprocess.check_output(["ls | grep '\.csv$'"],shell=True).decode("utf-8")
list_files = str_files.split("\n")[:-1]
list_files

In [None]:
# Read In Data
read_file = list_files[1]
df = pd.read_csv(read_file)
print("File Read: ",read_file)

In [None]:
# Display Data
df.head(10)

In [None]:
# Parse into peak and column Data

peak_headers   = list(df)[:2]
signal_headers = list(df)[2:]

df_peak    = df[peak_headers]
df_signal  = df[signal_headers]

# drop empty rows
df_peak   = df_peak.dropna(axis=0) 
df_signal = df_signal.dropna(axis=0) 

In [None]:
display(df_peak)
print(df_peak.dtypes,end="\n\n")
display(df_signal)
print(df_signal.dtypes,end="\n\n")

In [None]:
# Convert the data to floats
def convert(val):
    
    # conversion dictionary
    conversions = {
                    'n':10**-9,
                    'u':10**-6,
                    'm':10**-3,
                    'k':10**3,  
                    'M':10**6
                  }
    
    # get the last character in a value
    str_val   = str(val)
    last_char = str_val[-1]
    
    # convert last char it is in dictionary if it is in conversion list
    if last_char in conversions:
        val = float(str_val[:-1])*conversions[last_char]
    
    # implictely convert to float
    else:     
        try:
            val = float(val)
        except:
            print("Failed to Implicitely Convert to Float: ",val)
            return None
    
    return val
    

In [None]:
df_signal = df_signal.applymap(convert)
df_peak   = df_peak.applymap(convert)

In [None]:
print(df_peak.dtypes)
display(df_peak)

print(df_signal.dtypes)
display(df_signal)

In [None]:
# Plot

%matplotlib inline

# set plot size
mpl.rcParams['figure.figsize'] = (11,7) 

# Signal Data
x = Frequency = df_signal[signal_headers[0]]
y = Magnitude = df_signal[signal_headers[1]]

# Signal Plot
plt.plot(x, y, 
         linewidth=1, linestyle="-",
         color="red", label="Magnitude vs. Frequency")

# Peak Data
x_peak = df_peak[peak_headers[0]]
y_peak = df_peak[peak_headers[1]]

# Peak Plot
plt.scatter(x_peak, y_peak, color='blue')



plt.xlabel("Frequency")
plt.ylabel("Magnitude")
plt.title("Magnitude vs. Frequency")
plt.legend(loc="upper right")

plt.show()

In [None]:
del x_peak, y_peak, x, y, df_peak, df_signal, peak_headers, signal_headers, df, list_files

### Read In all Data

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

class Point_Click_Graph():
    """
    * Builds graphs with clickable points from given files, 
      and records the points that are clicked.
    
    * Only Applicable For:
            - Files that all have their data in the same column indices
            - All the columns in the file need to have their data convereted,
              using the same convert function
              
    * Make More Applicable:
            - This class can be made to be applicable for more general forms of data, 
              if some prepocessing is done on the files. In such a case, 
                  - The convert function may not need to do anything,
                  - And all of the columns in the file can be made to represent the appropriate data,
                    which would make this class appriopriate to use.
    """

    def __init__(self, file_names, signal_indices, point_indices, convert = None):
        """
        PARAMETERS:
            * :file_names: [Iterable] of   [Strings] All the files that will be analyzed
            * :signal/point indices: [Iterable] of 2 [Integers]:
                - [x_indice, y_indice]
                - each integer represents the indice in the csv file that corresponds
                  to x/y signal/point data.
            * convert: [function] The function that will be used to convert the values 
                                      in the files to integers
        """
        # provided data
        self.file_names     = file_names
        self.signal_indices = signal_indices
        self.point_indices  = point_indices
        self.convert = convert
        
        # synthesized data
        self.signal_headers, self.point_headers = self._get_headers()
        self.dict_data = self._store_file_data()
        
    def _get_headers(self):
        '''
        RETURNS: 2-[Lists] of [Strings]
            * The headers, corresponding to the indices that were passed in.
        '''
        df = pd.read_csv(self.file_names[0])
        headers = list(df)
        signal_headers = [headers[self.signal_indices[0]]] + \
                         [headers[self.signal_indices[1]]]
        point_headers  = [headers[self.point_indices[0]]]  + \
                         [headers[self.point_indices[1]]]
            
        return signal_headers, point_headers
        
    def _store_file_data(self):
        '''
        DESCRIPTION:
            1. reads in all of the data, from all of the files
            2. parses all of the data
            3. splits the data into signal, and peak dataframes 
        RETURNS: [Dictionary]:
            - KEY:   [String] file_name
            - VALUE: [Tuple] of [dataframes] -> (signal-DataFrame, point-DataFrame)
        '''

        file_to_data = {}

        for read_file in self.file_names:

            # Read Data
            df = pd.read_csv(read_file)

            # Sepperate DataFrames
            df_signal  = df[self.signal_headers]
            df_peak    = df[self.point_headers]

            # Drop Empty Rows
            df_signal = df_signal.dropna(axis=0) 
            df_peak   = df_peak.dropna(axis=0) 

            # Convert Data to Floats
            if convert is not None:
                df_signal = df_signal.applymap(convert)
                df_peak   = df_peak.applymap(convert)

            # Store data in dictionary, with file key
            file_to_data[read_file] = (df_signal,df_peak)

        return file_to_data

    def _get_plot_data(self, file_name):
        '''
        RETURNS: [tuple] of [arrays]:
            - the data from a single file that can than be plotted.
        '''

        # Signal to Plot
        df_signal = self.dict_data[file_name][0]  
        x = df_signal[self.signal_headers[0]]
        y = df_signal[self.signal_headers[1]]

        # Points To Plot
        df_point = self.dict_data[file_name][1]
        x_point  = df_point[self.point_headers[0]]
        y_point  = df_point[self.point_headers[1]]

        return x,y,x_point,y_point

    def make_plots(self, signal_kwargs={}, point_kwargs={}):
        '''
        DESCRIPTION:
            - Plots the data from all of the files one at a time.
            - These plots have clickable peaks. Everytime, a click is made on 
              a peak, the position of the peak is recorded. If the peak is 
              clicked on again, it will remove the record of that peak.
        RETURN:
            - returns a dictionary:
                - KEY: name of the file
                - VALUES: [List] of [Tuples] (x-coordinate, y-coordinate)
                          that describes the peaks that were clicked on. 
                          (if a peak is clicked again, it will be removed from 
                          this list.)
        '''

        # Dictionary that is returned
        important_points = {} 

        for file_name in self.dict_data:

            important_points[file_name] = []
 
            #      --- Basic Plot Info ---
            fig, ax = plt.subplots()                # Create Plot
            plt.ion()                               # Interactive mode on
    
            #      --- Get Signal and Peak Data ---
            x, y, x_points, y_points = self._get_plot_data(file_name)
  

            #      --- Make Plots --- 
            # Signal Plot
            ax.plot(x, y, **signal_kwargs)
            
            # Point Plot
            coll = ax.scatter(x_points, y_points, color=["blue"]*len(x_points),
                              picker=5,label="Point Data",**point_kwargs)
            
            # Set Axes Info 
            ax.set_xlim(0,np.max(x_points)+0.2*np.max(x_points))
            ax.set_ylim(0,np.max(y_points)+0.2*np.max(y_points))
            ax.set_xlabel("Frequency")
            ax.set_ylabel("Magnitude")
            ax.set_title(file_name)
            ax.legend(loc="upper right")

            print("File:",file_name)
            
            
            #      --- Clickable Event ---
            def on_pick(event):

                # index of the array, were the event occurred
                ind = event.ind[0]

                # where the event ocurred
                x_val, y_val = x_points[ind], y_points[ind]

                # add point
                if [x_val,y_val] not in important_points[file_name]:

                    # color is speciified by: RGBA tuple
                    # https://www.cgl.ucsf.edu/chimera/docs/ProgrammersGuide/Examples/footnotes/rgba.html
                    important_points[file_name].append([x_val,y_val])
                    coll._facecolors[ind,:] = (1, 0, 0, 1)
                    coll._edgecolors[ind,:] = (1, 0, 0, 1)
                    print("\t  Picked  Point [ {:>3} ] at: [ {:06.2f}, {:06.5f} ]".format(ind,x_val,y_val))

                # remove point
                else:

                    important_points[file_name].remove([x_val,y_val])
                    coll._facecolors[ind,:] = (0, 0, 1, 1)
                    coll._edgecolors[ind,:] = (0, 0, 1, 1)
                    print("\t  Removed Point [ {:>3} ] at: [ {:06.2f}, {:06.5f} ]".format(ind,x_val,y_val))

                fig.canvas.draw()

            fig.canvas.mpl_connect('pick_event', on_pick)

            # Add blocking to stop the program until graphs are built
            plt.show(block=True)

        return important_points

        


In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import subprocess
%matplotlib qt

#      --- Getting Data For Point_Click_Plot Class ---
def get_files(directory='.'):
    '''
    Returns: 
        - list of csv files from a given directory
    Parameters:
        - Directory: specifies the directory with the files you would like to analyze.
            - By Defualt: this method will use the current directory.
    '''
    
    # use shell command to get file list
    str_files  = subprocess.check_output(["ls "+directory+" | grep '\.csv$'"],shell=True).decode("utf-8")
    list_files = str_files.split("\n")
    
    # remove empty list elements
    while '' in list_files: list_files.remove('')  
        
    return list_files

def convert(val):
    '''
    DESCRIPTION:
        - converts a string into a float.
        - this function is passed into the Point_Click_Graph constructor
        - this function will be called on every data point in the data
          that is being analyzed by Point_Click_Graph
    '''
    
    # conversion dictionary
    conversions = {
                    'n':10**-9,
                    'u':10**-6,
                    'm':10**-3,
                    'k':10**3,  
                    'M':10**6
                  }
    
    # get the last character in a value
    str_val   = str(val)
    last_char = str_val[-1]
    
    # convert last char it is in dictionary if it is in conversion list
    if last_char in conversions:
        val = float(str_val[:-1])*conversions[last_char]
    
    # implictely convert to float
    else:     
        try:
            val = float(val)
        except:
            print("Failed to Implicitely Convert to Float: ",val)
            return None
    
    return val

#      --- Functions For Final Plot ---
def get_temps(dict_imp_peaks):
    '''
    DESCRIPTION:
        * This method returns temperature data by parsing it from the files
        that are used.
        * This method is subject to change, if the file names are changed in the future.
    '''
    
    Temp = []
    for file_name in dict_imp_peaks:
        for peak in dict_imp_peaks[file_name]:
            Temp.append(float(file_name.split('_')[4][:-1]))
            
    return Temp
    
def important_peak_plot(dict_imp_peaks):
    '''
    DESCRIPTION:
        * Generates a composite plot, from all of the peaks that were clicked
        * Frequency vs. Temperature
    '''
    # get temperatures and frequencies to plot
    Temp  = get_temps(dict_imp_peaks)
    Freq  = []
    for file_name in dict_imp_peaks:
        for peak in dict_imp_peaks[file_name]:
            Freq.append(peak[0])

    # create plot
    fig = plt.figure()

    plt.plot(Temp,Freq,"o", 
             markeredgewidth=2,markeredgecolor='b',
             markerfacecolor='None',
             label="Frequency vs Temp"
            )

    plt.xlabel("Temperature [K]",fontsize=15)
    plt.ylabel("Freq [kHz]",fontsize=15)
    plt.title("$BalrO_{3}$ ba916 5/15/18",fontsize=20)
    plt.grid(True)

    plt.show()
    
# Point_Click_Graph: 
#    * parameters:
#        1. all the files to be parsed
#        2. signal data indices in the files (x,y)
#        3. peak   data indices in the files (x,y)
#        4. convert function to be applied to the data
#    * automatically:
#        * reads in the data from the files,
#        * parses the data using the convert function
pcg = Point_Click_Graph(get_files(),[2,3],[0,1],convert)

#      --- Set Plot Parameters ---
signal_kwargs = { 
                "linewidth":1, 
                "linestyle":"-",
                "color":"green",
                "label":"Magnitude vs. Frequency"
                }
peak_kwargs =   {}
mpl.rcParams['figure.figsize'] = (11,7) # Set Default Plot Size

# make_plots():
#    * Generates all of plots, with the peaks that can be clicked
#    * RETURNS: [dictionary] mapping file_names to a [list] of [tuples],
#      describing the location of the important peaks [(x,y),(x,y) ... ]
dict_imp_peaks = pcg.make_plots(signal_kwargs,peak_kwargs) 

important_peak_plot(dict_imp_peaks)

In [None]:
import matplotlib as mpl
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import subprocess

#       --- Get/Clean Data  ---

def get_files(directory='.'):
    '''
    Returns: 
        - list of csv files from a given directory
    Parameters:
        - Directory: specifies the directory with the files you would like to analyze.
            - By Defualt: this method will use the current directory.
    '''
    
    # use shell command to get file list
    str_files  = subprocess.check_output(["ls "+directory+" | grep '\.csv$'"],shell=True).decode("utf-8")
    list_files = str_files.split("\n")
    
    # remove empty list elements
    while '' in list_files: list_files.remove('')  
        
    return list_files

def get_headers(file_name, signal_x_ind, signal_y_ind, peak_x_ind, peak_y_ind):
    '''
    PARAMETERS:
        - pass in a file, and the indices of the repective data
    RETURNS:
        - signal and peak labels for the data being used
    '''
    df = pd.read_csv(file_name)
    headers = list(df)
    
    signal_headers = [headers[signal_x_ind]] + [headers[signal_y_ind]]
    peak_headers   = [headers[peak_x_ind]]   + [headers[peak_y_ind]]

    return signal_headers, peak_headers

def convert(val):
    '''
    DESCRIPTION:
        - converts a string into a float.
        - You can use pandas to run this function over specific columns
          in your data, using, "df.apply_map(func)"
        - This is a helper function for store_file_data
    '''
    
    # conversion dictionary
    conversions = {
                    'n':10**-9,
                    'u':10**-6,
                    'm':10**-3,
                    'k':10**3,  
                    'M':10**6
                  }
    
    # get the last character in a value
    str_val   = str(val)
    last_char = str_val[-1]
    
    # convert last char it is in dictionary if it is in conversion list
    if last_char in conversions:
        val = float(str_val[:-1])*conversions[last_char]
    
    # implictely convert to float
    else:     
        try:
            val = float(val)
        except:
            print("Failed to Implicitely Convert to Float: ",val)
            return None
    
    return val

def store_file_data(file_names,signal_headers,peak_headers):
    '''
    DESCRIPTION:
        1. reads in all of the data, from all of the files
        2. parses all of the data
        3. splits the data into signal, and peak data, 
           that are stored in dataframes.
    RETURNS: dictionary:
        - KEY:   file_name
        - VALUE: (signal-DataFrame, peak-DataFrame)
    '''
    
    file_to_data = {}
    
    for read_file in file_names:
        
        # Read Data
        df = pd.read_csv(read_file)
           
        # Sepperate DataFrames
        df_signal  = df[list(df)[2:]]
        df_peak    = df[list(df)[:2]]
       
        # Drop Empty Rows
        df_signal = df_signal.dropna(axis=0) 
        df_peak   = df_peak.dropna(axis=0) 
        
        # Convert Data to Floats
        df_signal = df_signal.applymap(convert)
        df_peak = df_peak.applymap(convert)
        
        file_to_data[read_file] = (df_signal,df_peak)
    
    return file_to_data

def get_plot_data(file_name, all_data, signal_headers, peak_headers):
    '''
    RETURNS
        - the data from a single file that can than be plotted as 
          a tuple of arrays.
    '''

    # Signal to Plot
    df_signal = all_data[file_name][0]  
    x = df_signal[signal_headers[0]]
    y = df_signal[signal_headers[1]]
    
    # Peaks To Plot
    df_peak = all_data[file_name][1]
    x_peak  = df_peak[peak_headers[0]]
    y_peak  = df_peak[peak_headers[1]]
    
    return x,y,x_peak,y_peak

#       --- Build Graphs ---

def make_plots(all_data,signal_headers,peak_headers):
    '''
    DESCRIPTION:
        - Plots the data from all of the files one at a time.
        - These plots have clickable peaks. Everytime, a click is made on 
          a peak, the position of the peak is recorded. If the peak is 
          clicked on again, it will remove the record of that peak.
    RETURN:
        - returns a dictionary:
            - KEY: name of the file
            - VALUES: List of Tuples (x-coordinate, y-coordinate)
                      that describes the peaks that were clicked on. 
                      (if a peak is clicked again, it will be removed from 
                      this list.)
    '''
    
    important_peaks = {}

    for file_name in all_data:

        important_peaks[file_name] = []

        # Set Plot Size
        mpl.rcParams['figure.figsize'] = (11,7) 

        # Create Plot Figure/Axes
        fig, ax = plt.subplots()

        # Get Signal and Peak Data
        x, y, x_peak, y_peak = get_plot_data(file_name,all_data,signal_headers,peak_headers)

        # Set Axis Limits 
        # - (y-axis range is 20% greater than max data point in a plot)
        ax.set_xlim(0,1200000)
        ax.set_ylim(0,np.max(y_peak)+0.2*np.max(y_peak))

        ax.set_xlabel("Frequency")
        ax.set_ylabel("Magnitude")
        ax.set_title(file_name)

        # Peak Plot
        coll = ax.scatter(x_peak, y_peak, color=["blue"]*len(x_peak),picker=5,label="Peak Data")

        # Signal Plot
        ax.plot(x, y, linewidth=1, linestyle="-",
                 color="red", label="Magnitude vs. Frequency")

        print("File:",file_name)
        def on_pick(event):
            
            # index of the array, were the event occurred
            ind = event.ind[0]
            
            # where the event ocurred
            x_val = x_peak[ind]
            y_val = y_peak[ind]
            
            # add point
            if [x_val,y_val] not in important_peaks[file_name]:
            
                # color is speciified by: RGBA tuple
                # https://www.cgl.ucsf.edu/chimera/docs/ProgrammersGuide/Examples/footnotes/rgba.html
                important_peaks[file_name].append([x_val,y_val])
                coll._facecolors[ind,:] = (1, 0, 0, 1)
                coll._edgecolors[ind,:] = (1, 0, 0, 1)

                print("\t  Picked  Peak [ {:>3} ] at point: [ {:06.2f}, {:06.5f} ]".format(ind,x_val,y_val))
            
            # remove point
            else:
                
                important_peaks[file_name].remove([x_val,y_val])
                coll._facecolors[ind,:] = (0, 0, 1, 1)
                coll._edgecolors[ind,:] = (0, 0, 1, 1)

                print("\t  Removed Peak [ {:>3} ] at point: [ {:06.2f}, {:06.5f} ]".format(ind,x_val,y_val))
            
            fig.canvas.draw()

        ax.legend(loc="upper right")

        fig.canvas.mpl_connect('pick_event', on_pick)

        plt.show(block=True)

    return important_peaks

# 	--- Main Method  ---

def main():
    pass


if __name__ == "__main__":
    main()



In [None]:
#       --- Get Data ---
%matplotlib qt
all_files = get_files()
signal_headers, peak_headers = get_headers(all_files[0],2,3,0,1)
all_data  = store_file_data(all_files,signal_headers,peak_headers)

#       --- Make Plots ---
important_peaks = make_plots(all_data,signal_headers,peak_headers)

#print(important_peaks)




In [None]:
#       --- Total Plot  ---
%matplotlib qt
Freq = []
Temp  = []
for file_name in important_peaks:
    for peak in important_peaks[file_name]:
        Freq.append(peak[0])
        Temp.append(float(file_name.split('_')[4][:-1]))

print("Frequency:\n",Freq)
print("Temperature:\n",Temp)

plt.plot(Temp,Freq,"ro", linewidth=1,
              label="Frequency vs Temp")
plt.show()
    