## Preparing and saving data

In [1]:
import pandas as pd
import numpy as np
from scipy.io.arff import loadarff

In [2]:
df_view = pd.DataFrame(loadarff("php9xWOpn.arff")[0])

In [3]:
df_numerical = pd.get_dummies(df_view)

In [4]:
# df_numerical.info()
df_num = df_numerical.drop(columns=["Class_b'1'"]) 
df_normalized = (df_num - df_num.mean()) / df_num.std()
df_normalized["Class_b\'2\'"] = df_num["Class_b\'2\'"]
target = "Class_b\'2\'"


In [5]:
pos = df_normalized[target].mean()
neg = 1 - pos

In [6]:
len(df_normalized)

1941

In [6]:
# df_normalized.to_csv("data.csv", encoding="utf-8", index=False)

## Visualizing data

### T-SNE

In [8]:
df_normalized.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1941 entries, 0 to 1940
Data columns (total 34 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   V1          1941 non-null   float64
 1   V2          1941 non-null   float64
 2   V3          1941 non-null   float64
 3   V4          1941 non-null   float64
 4   V5          1941 non-null   float64
 5   V6          1941 non-null   float64
 6   V7          1941 non-null   float64
 7   V8          1941 non-null   float64
 8   V9          1941 non-null   float64
 9   V10         1941 non-null   float64
 10  V11         1941 non-null   float64
 11  V12         1941 non-null   float64
 12  V13         1941 non-null   float64
 13  V14         1941 non-null   float64
 14  V15         1941 non-null   float64
 15  V16         1941 non-null   float64
 16  V17         1941 non-null   float64
 17  V18         1941 non-null   float64
 18  V19         1941 non-null   float64
 19  V20         1941 non-null  

In [9]:
from sklearn.manifold import TSNE

tsne = TSNE(n_components=2, random_state=42)
X_tsne = tsne.fit_transform(df_normalized)



In [10]:
import plotly.express as px

In [13]:
fig = px.scatter(x=X_tsne[:, 0], y=X_tsne[:, 1], color=df_normalized[target])
fig.update_layout(
    title="t-SNE visualization of Custom Classification dataset",
    xaxis_title="First t-SNE",
    yaxis_title="Second t-SNE",
)
fig.show()