Author: Daniel Yan

Date: 2018-07-16

Email: daniel.yan@vanderbilt.edu

Description: t-distributed stochastic neighbor embedding analysis on data with scatter plot visualization. 

Preconditions: Data file must contain one column named "label" containing labels for the data. All other columns must contain features. Data file must not have indices for rows.

Command Line Arguments:
First argument: Name of data file to read from. Include directory.
Second argument: Name of file to store scatterplot to. Include directory.
Third argument: Name of file to store tsne model to. Include directory.
Error will result if any argument is invalid.

In [None]:
# Libraries
import pandas as pd # Data
from pca import scatterplot_cords, label_coordinates # For creating scatterplot from labeled coordinates
from sklearn.externals import joblib # Save tsne model
from sklearn.manifold import TSNE
import sys # Command line arguments

In [None]:
if __name__ == "__main__":
    # Get data file to process
    data_file = sys.argv[1]
    # Get name of file to save scatterplot to.
    scatterplot_file = sys.argv[2]
    # Get name of file to save model to.
    model_file = sys.argv[3]
    
    # Load in data file
    print("Loading data file...")
    data_frame = pd.read_table(data_file)
    
    # Get labels
    labels_df = data_frame.loc["label"]

    # Get features
    features_df = data_frame.drop(columns = "label")
    
    # Create tsne and transform coordinates
    tsne = TSNE(n_components = n_components, init = "pca", random_state = 0)
    features_transformed = tsne.fit_transform(features_df)
    
    # Label the transformed coordinates
    transformed_df = label_coordinates(transformed_coordinates = features_transformed, 
                                       labels = labels_df)
    
    # Get the list of unique labels
    labels_list = list(set(labels_df))
    
    # Plot the transformed coordinates
    scatterplot_cords(df = transformed_df, file_name = scatterplot_file, labels_list = labels_list,
                     )