In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.cluster import DBSCAN, KMeans
from sklearn.compose import make_column_transformer
from sklearn.datasets import load_sample_image, make_blobs
from sklearn.decomposition import PCA
from sklearn.ensemble import BaggingClassifier, BaggingRegressor, GradientBoostingClassifier, GradientBoostingRegressor, RandomForestClassifier, RandomForestRegressor, StackingClassifier, StackingRegressor
from sklearn.impute import KNNImputer, SimpleImputer
from sklearn.inspection import permutation_importance
from sklearn.linear_model import Lasso, LinearRegression, LogisticRegression, Ridge
from sklearn.manifold import Isomap, LocallyLinearEmbedding, MDS
from sklearn.metrics import auc, confusion_matrix, ConfusionMatrixDisplay, davies_bouldin_score, rand_score, roc_auc_score, roc_curve, silhouette_samples, silhouette_score 
from sklearn.model_selection import train_test_split, cross_validate, GridSearchCV
from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder, PolynomialFeatures, StandardScaler
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor, plot_tree
import statsmodels.api as sm

<h1>Test-Training-Split for V1</h1>

In [9]:
df1 = pd.read_csv('v1_data_dog_prepared.csv')
df1.head()

Unnamed: 0,size_dog,eye_circularity,skull_circularity,ear_size,eyes_distance,ear_form,hair_tone,nose_elongatedness,weight_dog,muzzle_form,skin_tone_main,skin_pattern_complexity,body_length,tail_length,hair_curlness,hair_length,leg_front_number,leg_back_number,breuni_class
0,95.0,48.0,83.0,178.0,72.0,10.0,162.0,42.0,20.0,159.0,176.0,379.0,184.0,70.0,6.0,16.0,187.0,197.0,fashion_lifestyle_elite
1,91.0,41.0,84.0,141.0,57.0,9.0,149.0,45.0,19.0,143.0,170.0,330.0,158.0,72.0,9.0,14.0,189.0,199.0,fashion_lifestyle_elite
2,104.0,50.0,106.0,209.0,66.0,10.0,207.0,32.0,23.0,158.0,223.0,635.0,220.0,73.0,14.0,9.0,188.0,196.0,main_stream
3,93.0,41.0,82.0,159.0,63.0,9.0,144.0,46.0,19.0,143.0,160.0,309.0,127.0,63.0,6.0,10.0,199.0,207.0,fashion_lifestyle_elite
4,85.0,44.0,70.0,205.0,103.0,52.0,149.0,45.0,19.0,144.0,241.0,325.0,188.0,127.0,9.0,11.0,180.0,183.0,trendy


In [5]:
X= df1['size_dog']
y= df1['eye_circularity']

# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True)

# printing out train and test sets
print('X_train : ')
print(X_train.head())
print(X_train.shape)
print('')
print('X_test : ')
print(X_test.head())
print(X_test.shape)
print('')
print('y_train : ')
print(y_train.head())
print(y_train.shape)
print('')
print('y_test : ')
print(y_test.head())
print(y_test.shape)


X_train : 
559    101.0
615     88.0
658     91.0
302     94.0
671    103.0
Name: size_dog, dtype: float64
(634,)

X_test : 
39     81.0
250    95.0
314    90.0
96     89.0
198    81.0
Name: size_dog, dtype: float64
(212,)

y_train : 
559    56.0
615    34.0
658    39.0
302    37.0
671    41.0
Name: eye_circularity, dtype: float64
(634,)

y_test : 
39     45.0
250    38.0
314    42.0
96     42.0
198    46.0
Name: eye_circularity, dtype: float64
(212,)


<h1>Test-Training-Split for V2</h1>

In [8]:
df2 = pd.read_csv('v2_data_dog_prepared.csv')
df2.head()

Unnamed: 0,0,1,2,3,4,breuni_class
0,0.329746,-0.215835,0.997248,0.171485,0.081947,fashion_lifestyle_elite
1,-1.594478,-0.422979,-0.370002,0.231497,0.691322,fashion_lifestyle_elite
2,3.760799,0.186851,0.087971,1.20277,0.729704,main_stream
3,-1.742311,-2.825632,0.111181,0.374927,-0.370714,fashion_lifestyle_elite
4,0.549668,4.764098,11.689234,0.16383,3.25721,trendy


In [7]:
X= df2["0"]
y= df2["1"]

# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True)

# printing out train and test sets
print('X_train : ')
print(X_train.head())
print(X_train.shape)
print('')
print('X_test : ')
print(X_test.head())
print(X_test.shape)
print('')
print('y_train : ')
print(y_train.head())
print(y_train.shape)
print('')
print('y_test : ')
print(y_test.head())
print(y_test.shape)

X_train : 
559    4.652916
615   -3.282399
658   -2.422918
302   -1.350330
671    0.579896
Name: 0, dtype: float64
(634,)

X_test : 
39    -1.573252
250   -3.384509
314   -2.280153
96    -1.829323
198   -1.816749
Name: 0, dtype: float64
(212,)

y_train : 
559    0.448437
615   -3.986502
658   -3.319598
302   -2.613045
671   -2.836430
Name: 1, dtype: float64
(634,)

y_test : 
39     1.361365
250    1.530229
314    2.482860
96     0.661732
198    2.635628
Name: 1, dtype: float64
(212,)


<h1>Test-Training-Split for V3</h1>

In [11]:
df3 = pd.read_csv('v3_data_dog_prepared.csv')
df3.head()

Unnamed: 0,0,1,breuni_class
0,-57.332839,12.411256,fashion_lifestyle_elite
1,-116.911811,-9.414613,fashion_lifestyle_elite
2,211.526976,7.91104,main_stream
3,-141.291488,20.490169,fashion_lifestyle_elite
4,-98.423682,50.854161,trendy


In [20]:
X= df3["0"]
y= df3["1"]

# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True)

# printing out train and test sets
print('X_train : ')
print(X_train.head())
print(X_train.shape)
print('')
print('X_test : ')
print(X_test.head())
print(X_test.shape)
print('')
print('y_train : ')
print(y_train.head())
print(y_train.shape)
print('')
print('y_test : ')
print(y_test.head())
print(y_test.shape)

X_train : 
559    277.125232
615   -170.807851
658   -163.376149
302    -59.899219
671     15.431938
Name: 0, dtype: float64
(634,)

X_test : 
39    -104.117002
250   -200.848381
314   -111.874448
96    -136.870923
198   -102.251767
Name: 0, dtype: float64
(212,)

y_train : 
559    21.737060
615    19.546498
658    18.334842
302    45.470949
671    37.899561
Name: 1, dtype: float64
(634,)

y_test : 
39      3.606437
250   -18.040219
314   -34.589879
96     -4.601024
198   -36.636067
Name: 1, dtype: float64
(212,)


<h1>Test-Training-Split for V4</h1>

In [12]:
df4 = pd.read_csv('v4_data_dog_prepared.csv')
df4.head()

Unnamed: 0,data_to_neuron,neuron_to_neighbour,breuni_class
0,0.255732,0.314266,fashion_lifestyle_elite
1,0.234393,0.314266,fashion_lifestyle_elite
2,0.195993,0.314266,main_stream
3,0.354151,0.314266,fashion_lifestyle_elite
4,0.305593,0.314266,trendy


In [19]:
X= df4["data_to_neuron"]
y= df4["neuron_to_neighbour"]

# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True)

# printing out train and test sets
print('X_train : ')
print(X_train.head())
print(X_train.shape)
print('')
print('X_test : ')
print(X_test.head())
print(X_test.shape)
print('')
print('y_train : ')
print(y_train.head())
print(y_train.shape)
print('')
print('y_test : ')
print(y_test.head())
print(y_test.shape)

X_train : 
559    0.227847
615    0.163304
658    0.180338
302    0.119380
671    0.215355
Name: data_to_neuron, dtype: float64
(634,)

X_test : 
39     0.195729
250    0.118763
314    0.005090
96     0.243865
198    0.077941
Name: data_to_neuron, dtype: float64
(212,)

y_train : 
559    0.215863
615    0.110354
658    0.158346
302    0.218103
671    0.250047
Name: neuron_to_neighbour, dtype: float64
(634,)

y_test : 
39     0.142201
250    0.127004
314    0.114538
96     0.045021
198    0.077130
Name: neuron_to_neighbour, dtype: float64
(212,)


<h1>Test-Training-Split for V5</h1>

In [13]:
df5 = pd.read_csv('v5_data_dog_prepared.csv')
df5.head()

Unnamed: 0,0,1,breuni_class
0,-61.777979,78.625284,fashion_lifestyle_elite
1,-163.733415,30.556835,fashion_lifestyle_elite
2,339.649554,30.058836,main_stream
3,-193.022682,-4.488933,fashion_lifestyle_elite
4,-238.553359,136.040289,trendy


In [21]:
X= df5["0"]
y= df5["1"]

# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True)

# printing out train and test sets
print('X_train : ')
print(X_train.head())
print(X_train.shape)
print('')
print('X_test : ')
print(X_test.head())
print(X_test.shape)
print('')
print('y_train : ')
print(y_train.head())
print(y_train.shape)
print('')
print('y_test : ')
print(y_test.head())
print(y_test.shape)

X_train : 
559    433.518314
615   -222.223804
658   -199.635812
302    -53.483889
671     56.290710
Name: 0, dtype: float64
(634,)

X_test : 
39    -150.364355
250   -275.504601
314   -186.446824
96    -199.675221
198   -168.090015
Name: 0, dtype: float64
(212,)

y_train : 
559     4.874678
615   -59.166614
658   -52.422755
302    10.304122
671     8.328843
Name: 1, dtype: float64
(634,)

y_test : 
39     53.106404
250   -76.459464
314    52.714302
96     21.848732
198    63.212555
Name: 1, dtype: float64
(212,)


<h1>Test-Training-Split for V6</h1>

In [14]:
df6 = pd.read_csv('v6_data_dog_prepared.csv')
df6.head()

Unnamed: 0,0,1,breuni_class
0,0.034879,-0.023564,fashion_lifestyle_elite
1,0.034879,-0.024841,fashion_lifestyle_elite
2,0.034879,0.053092,main_stream
3,0.034879,-0.025692,fashion_lifestyle_elite
4,0.034879,-0.022901,trendy


In [22]:
X= df6["0"]
y= df6["1"]

# using the train test split function
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True)

# printing out train and test sets
print('X_train : ')
print(X_train.head())
print(X_train.shape)
print('')
print('X_test : ')
print(X_test.head())
print(X_test.shape)
print('')
print('y_train : ')
print(y_train.head())
print(y_train.shape)
print('')
print('y_test : ')
print(y_test.head())
print(y_test.shape)

X_train : 
559    0.034879
615    0.034879
658    0.034879
302    0.034879
671    0.034879
Name: 0, dtype: float64
(634,)

X_test : 
39     0.034879
250    0.034879
314    0.034879
96     0.034879
198    0.034879
Name: 0, dtype: float64
(212,)

y_train : 
559    0.059222
615   -0.026054
658   -0.026229
302   -0.014876
671    0.007602
Name: 1, dtype: float64
(634,)

y_test : 
39    -0.023110
250   -0.029636
314   -0.022946
96    -0.024781
198   -0.023027
Name: 1, dtype: float64
(212,)
