In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sklearn as sk
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn import svm
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import AdaBoostClassifier

import plotly
import plotly.offline as py
py.init_notebook_mode(connected=True)
import plotly.graph_objs as go

In [3]:
df = pd.read_csv('final_dataset_1_3.csv')

In [4]:
print(df.class_number.value_counts())

1    11540
3    11531
Name: class_number, dtype: int64


In [5]:
small_df_pd = df.sample(5000)

In [26]:
classify_df_pd = df

predict_col = 1
df_train, df_test = train_test_split(classify_df_pd, test_size=0.3, stratify=classify_df_pd.iloc[:, predict_col])

In [27]:
col_train = np.r_[7:178]

# column to predict is 'class_number' (1st column)
# column indexing is zero-based
y_train = df_train.iloc[:, predict_col]
y_test = df_test.iloc[:, predict_col]

# matrix of explanatory variables includes
X_train = df_train.iloc[:, col_train]
X_test = df_test.iloc[:, col_train]

In [None]:
df_reduced = classify_df_pd.sample(100)
#df_reduced = classify_df_pd

df_train_reduced, df_test_reduced = train_test_split(df_reduced, test_size=0.3, stratify=df_reduced.iloc[:, predict_col])
col_train = np.r_[7:178]

# column to predict is 'class_number' (3rd column)
# column indexing is zero-based
y_train_reduced = df_train_reduced.iloc[:, predict_col]
y_test_reduced = df_test_reduced.iloc[:, predict_col]

# matrix of explanatory variables includes
X_train_reduced = df_train_reduced.iloc[:, col_train]
X_test_reduced = df_test_reduced.iloc[:, col_train]

svc_lin = svm.LinearSVC()
svc_nu = svm.NuSVC(gamma='auto')
svr_rbf = SVR(kernel='rbf', C=100, gamma=0.1, epsilon=.1)
svr_lin = SVR(kernel='linear', C=100, gamma='auto')
svr_poly = SVR(kernel='poly', C=100, gamma='auto', degree=3, epsilon=.1, coef0=1, cache_size=300)
svr_poly.fit(X_train_reduced, y_train_reduced)

score = svr_poly.score(X_test_reduced, y_test_reduced)
print(score)

In [30]:
ADBoost = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=10).fit(X_train, y_train)
print(ADBoost.score(X_test, y_test))

0.4885871135509968


In [9]:
plot_df_pd = small_df_pd
# plot_df_pd = df

feature1 = 'ap_2' #'a_900'
feature2 = 'ap_3' #'a_180'
feature3 = 'ap_5' #'a_490'
cnt_series = plot_df_pd[['class_number', feature1, feature2, feature3]].groupby(['class_number', feature1, feature2, feature3]).size()
cnt_df = cnt_series.to_frame(name = 'cnt').reset_index()

In [10]:
colors = cnt_df['class_number']
color_map = {1:'red', 3:'green'}

fig = go.Figure(
    data=[go.Scatter3d(
    x=cnt_df.loc[:, feature1].array,
    y=cnt_df.loc[:, feature2].array,
    z=cnt_df.loc[:, feature3].array,
    mode='markers',
    opacity=0.7,
    marker=dict(
        size=cnt_df.cnt.array,
        sizemode='area',
        sizeref=2.*max(cnt_df.cnt.array)/(60.**2),
        sizemin=4,
        color=cnt_df.class_number.map(color_map),
        line=dict(color='white', width=0)
    ))]
)
fig.update_layout(
    title=feature1 + ', ' + feature2 + ' and ' + feature3,
    scene = dict(
        xaxis=dict(
            title=feature1,
            gridcolor='white'
        ),
        yaxis=dict(
            title=feature2,
            gridcolor='white'
        ),
        zaxis=dict(
            title=feature3,
            gridcolor='white'
        ),       
        bgcolor = 'white'
    ),
    paper_bgcolor='white',
    plot_bgcolor='white',
)
fig.write_html("class_number_coeffs.html")
fig.show()

In [9]:
feature1 = 'res_s=0'
feature2 = 'regulator'
feature3 = 'discriminant'

dr1 = go.Scatter3d(
    x=plot_df_pd[plot_df_pd['class_number'] == 1][feature1],
    y=plot_df_pd[plot_df_pd['class_number'] == 1][feature2],
    z=plot_df_pd[plot_df_pd['class_number'] == 1][feature3],
    mode='markers',
    marker = dict(size=3,
                  color='rgb(255,0,0)',
                  line=dict(width=1)),
    name ='class_number_1'
)
dr2 = go.Scatter3d(
    x=plot_df_pd[plot_df_pd['class_number'] == 2][feature1],
    y=plot_df_pd[plot_df_pd['class_number'] == 2][feature2],
    z=plot_df_pd[plot_df_pd['class_number'] == 2][feature3],
    mode='markers',
    marker = dict(size=3,
                  color='rgb(0,255,0)',
                  line=dict(width=1)),
    name ='class_number_2'
)
data = [dr1, dr2]
layout = go.Layout(
    title='class_number',
    scene = dict(
        xaxis = dict(title=feature1),
        yaxis = dict(title=feature2),
        zaxis = dict(title=feature3),)
)
fig = go.Figure(data=data, layout=layout)
py.iplot(fig, filename='class_number')