In [17]:
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
import numpy as np
import time
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.graph_objs as go

from cvjson.cvj import CVJ
from resvm import get_data, group_categories


In [2]:
pyo.offline.init_notebook_mode(connected=True)

In [30]:
def threeD_data(data, labels):
    df = pd.DataFrame(data)
    df.columns = ["x", "y", "z"]

    df['labels'] = labels

    return df

def twoD_data(data, labels):
    df = pd.DataFrame(data)
    df.columns = ["x", "y"]

    df['labels'] = labels

    return df

def plot_3d(df, layout=None, file_name=None):
    
    labels = df["labels"].unique()
    traces = []
    for label in labels:
        traces.append(go.Scatter3d(x=df[df['labels'] == label]['x'], y=df[df['labels'] == label]['y'], z=df[df['labels'] == label]['z'],
                        mode='markers', name=label))

    data = go.Data(traces)

    if layout != None:
        data = go.Figure(data=data, layout=layout)

    if file_name == None:
        if layout != None:
            file_name = layout.title
        else:
            file_name = "3D_plot"

    pyo.iplot(data, filename=file_name +".html")
    
def plot_2d(df, layout=None, file_name=None):
    
    labels = df["labels"].unique()
    traces = []
    for label in labels:
        traces.append(go.Scatter(x=df[df['labels'] == label]['x'], y=df[df['labels'] == label]['y'],
                        mode='markers', name=label))

    data = go.Data(traces)

    if layout != None:
        data = go.Figure(data=data, layout=layout)

    if file_name == None:
        if layout != None:
            file_name = layout.title
        else:
            file_name = "2D_plot"

    pyo.iplot(data, filename=file_name +".html")
    
def tsne_3d_kmeans(kmeans_clf, df, layout=None, file_name=None):
    
    labels = kmeans_clf.fit(df.values).labels_

    df['labels'] = labels

    traces = []
    for label in labels:
        traces.append(go.Scatter3d(x=df[df['labels'] == label]['x'], y=df[df['labels'] == label]['y'], z=df[df['labels'] == label]['z'],
                        mode='markers', name="Cluster_" + label))

    data = go.Data(traces)

    if layout != None:
        data = go.Figure(data=data, layout=layout)

    if file_name == None:
        if layout != None:
            file_name = layout.title
        else:
            file_name = "tsne_3d_kmeans"

    pyo.iplot(data, filename=file_name +".html")


def tsne_2d_kmeans(kmeans_clf, df, layout=None, file_name=None):

    labels = kmeans_clf.fit(df.values).labels_

    df['labels'] = labels

    traces = []
    for label in labels:
        traces.append(go.Scatter(x=df[df['labels'] == label]['x'], y=df[df['labels'] == label]['y'],
                        mode='markers', name="Cluster_" +label))

    data = go.Data(traces)

    if layout != None:
        data = go.Figure(data=data, layout=layout)

    if file_name == None:
        if layout != None:
            file_name = layout.title
        else:
            file_name = "tsne_2d_kmeans"

    pyo.iplot(data, filename=file_name +".html")

def tsne_3d(df, cvj_obj, layout=None, file_name=None):

    labels = df['labels'].unique()

    traces = []
    for label in labels:
        traces.append(go.Scatter3d(x=df[df['labels'] == label]['x'], y=df[df['labels'] == label]['y'], z=df[df['labels'] == label]['z'],
                        mode='markers', name=cvj_obj.get_class_id_2_name(label)))

    data = go.Data(traces)

    if layout != None:
        data = go.Figure(data=data, layout=layout)

    if file_name == None:
        if layout != None:
            file_name = layout.title
        else:
            file_name = "tsne_3d"


    pyo.iplot(data, filename=file_name +".html")


def tsne_2d(df, cvj_obj, layout=None, file_name=None):

    labels = df['labels'].unique()

    traces = []
    for label in labels:
        traces.append(go.Scatter(x=df[df['labels'] == label]['x'], y=df[df['labels'] == label]['y'],
                        mode='markers', name=cvj_obj.get_class_id_2_name(label)))

    data = go.Data(traces)

    if layout != None:
        data = go.Figure(data=data, layout=layout)

    if file_name == None:
        if layout != None:
            file_name = layout.title
        else:
            file_name = "tsne_2d"


    pyo.iplot(data, filename=file_name +".html")


In [4]:
train_labeled_features_path = "/home/ben/Desktop/mapped_features/train_mardct_coco_fine_ipatch/boat_mapped_features.hdf5" 
test_labeled_features_path = "/home/ben/Desktop/mapped_features/test_mardct_coco_fine_ipatch/boat_mapped_features.hdf5"


train_image_path = "/home/ben/Desktop/M12_Folder_Mimic/Datasets/Train/Images/Mardct/completed_train_refinement"
train_cvj = CVJ("/home/ben/Desktop/M12_Folder_Mimic/Datasets/Train/Cocoized/without_coco_categories/completed_train_refinement.json", train_image_path)


test_image_path = "/home/ben/Desktop/M12_Folder_Mimic/Datasets/Validation/Images/completed_refinement_test_images"
test_cvj = CVJ("/home/ben/Desktop/M12_Folder_Mimic/Datasets/Validation/Cocoized/without_coco_categories/completed_test_refinement.json", test_image_path)

train_real_noise = "/home/ben/Desktop/mapped_features/mardct_train_mapped_features/train_boat_near_negatives.hdf5"
test_real_noise = "/home/ben/Desktop/mapped_features/mardct_test_mapped_features/boatnear_negatives.hdf5"

# Training Data (Deep Features)

Full amount of data, including all classes.

Below getting TSNE matrices with normal labels for all classes with 3 components and 2 components

In [13]:
X, y, labels, counts = get_data(train_labeled_features_path)

threeD_model = TSNE(n_components=3, random_state=0)
threeD_tsne_matrix = threeD_model.fit_transform(X)
threeD_df = threeD_data(threeD_tsne_matrix, y)

twoD_model = TSNE(n_components=2, random_state=0)
twoD_tsne_matrix = twoD_model.fit_transform(X)
twoD_df = twoD_data(twoD_tsne_matrix, y)



A value error occurred in get_data() when reshaping the features.  Continuing.


In [12]:
tsne_3d(threeD_df, train_cvj)

## KMEANs 

Clustered on the original data and plotted using TSNE for reduction

In [31]:
labels = KMeans(n_clusters=5, random_state=0).fit(X).labels_
tsne_3d_kmeans(kmeans_clf, X)


ValueError: Length mismatch: Expected axis has 2048 elements, new values have 3 elements

Clustered on TSNE and plotted using TSNE for reduction

### Different groupings

Grouping the classes based on Dr.Tesics request

In [None]:
name_list = [["Mototopo"], ["VaporettoACTV"], ["Lanciafino10mMarrone", "Lanciafino10mBianca"], ["Barchino", "Patanella", "Sanpierota", "Cacciapesca", "Topa"], ["Gondola", "Sandoloaremi", "Caorlina"]]

id_list = []
for names in name_list:
    group = []
    for name in names:
        group.append(train_cvj.get_class_name_2_id()[name.lower()])
    id_list.append(group)
    
X_train, y_train = group_categories(X_train, y_train, id_list, noise=True)


In [None]:
threeD_model = TSNE(n_components=3, random_state=0)
threeD_tsne_matrix = threeD_model.fit_transform(X_train)
threeD_df = threeD_data(threeD_tsne_matrix, y_train)

twoD_model = TSNE(n_components=2, random_state=0)
twoD_tsne_matrix = twoD_model.fit_transform(X_train)
twoD_df = twoD_data(twoD_tsne_matrix, y_train)

# Testing Data (Deep Features)

In [None]:
# X, y, labels, counts = get_data(test_labeled_features_path)

# threeD_model = TSNE(n_components=3, random_state=0)
# threeD_tsne_matrix = threeD_model.fit_transform(X)
# threeD_df = threeD_data(threeD_tsne_matrix, y)

# twoD_model = TSNE(n_components=2, random_state=0)
# twoD_tsne_matrix = twoD_model.fit_transform(X)
# twoD_df = twoD_data(twoD_tsne_matrix, y)