In [None]:
import pandas as pd 
import numpy as np
from sklearn.datasets import load_iris, load_wine
from sklearn.preprocessing import StandardScaler

In [None]:
def dataPreprocessing():
    # Loading dataset
    penguins_data = pd.read_csv('penguins.csv')
    # Preprocess
    penguins_data.dropna(inplace=True)
    penguins_data.drop(penguins_data.index[penguins_data['sex'] == '.'].tolist(),inplace=True)
    penguins_data['species']=penguins_data['species'].map({'Adelie':0,'Gentoo':1,'Chinstrap':2})
    penguins_data.reset_index(inplace=True)
    penguins_data.drop('index', axis=1, inplace=True)
    df_male = pd.get_dummies(data=penguins_data['sex'], drop_first=True)
    penguins_data = pd.concat([penguins_data,df_male], axis=1)
    df_island = pd.get_dummies(data=penguins_data['island'])
    penguins_data = pd.concat([penguins_data,df_island], axis=1)
    pgn = penguins_data.drop(['island','sex'], axis=1)
    
    X = pgn.drop('species', axis=1)
    y = pgn['species']
    
    return X, y

In [None]:
def process_feature_names(str):
    str = str.replace("(","").replace(")", "")
    return "_".join(str.split())

In [None]:
wine = load_wine()
iris = load_iris()
iris.feature_names = list(map(process_feature_names, iris.feature_names))
iris_X, iris_y = pd.DataFrame(data=iris.data, columns=iris.feature_names), pd.DataFrame(data=iris.target, columns=['target'])
wine_X, wine_y = pd.DataFrame(data=wine.data, columns=wine.feature_names), pd.DataFrame(data=wine.target, columns=['target'])
penguins_X, penguins_y = dataPreprocessing()
penguins_cols = list(map(str.lower, penguins_X.columns.values.tolist()))
penguins_X = pd.DataFrame(data=penguins_X.values, columns=penguins_cols)

scaler = StandardScaler()
iris_X_scaled = scaler.fit_transform(iris_X)
wine_X_scaled = scaler.fit_transform(wine_X)
penguins_X_scaled = scaler.fit_transform(penguins_X)

In [None]:
iris_X_scaled = pd.DataFrame(data=iris_X_scaled, columns=iris.feature_names)
wine_X_scaled = pd.DataFrame(data=wine_X_scaled, columns=wine.feature_names)
penguins_X_scaled = pd.DataFrame(data=penguins_X_scaled, columns=penguins_cols)

In [None]:
pd.DataFrame(wine_X).to_csv('x_wine.csv', index=False)
pd.DataFrame(wine_X_scaled).to_csv("x_wine_scaled.csv", index=False)
pd.DataFrame(wine_y).to_csv("y_wine.csv", index=False)
pd.DataFrame(iris_X).to_csv('x_iris.csv', index=False)
pd.DataFrame(iris_X_scaled).to_csv("x_iris_scaled.csv", index=False)
pd.DataFrame(iris_y).to_csv("y_iris.csv", index=False)
pd.DataFrame(penguins_X).to_csv('x_penguins.csv', index=False)
pd.DataFrame(penguins_X_scaled).to_csv("x_penguins_scaled.csv", index=False)
pd.DataFrame(penguins_y).to_csv("y_penguins.csv", index=False)

In [None]:
from sklearn import cluster, datasets

In [None]:
n_samples = [156, 277, 421]
random_state = 170

# make 3-class dataset for classification
centers = [[19.5, 53.8], [24.5, 57.6], [31.5, 61.1]]
X, y = datasets.make_blobs(
    n_samples=n_samples, centers=centers, random_state=random_state
)
transformation = [[0.4, -0.7], [-0.95, 3.4]]
X = np.dot(X, transformation)
X_new, y = datasets.make_blobs(
    n_samples=n_samples, centers=centers, cluster_std=[2.4,3.6,3.6], random_state=random_state)
transformation_new = [[-0.5, 0.7], [0.95, -3.4]]
X_new = np.dot(X_new, transformation_new)
X = np.concatenate((X, X_new), axis=1)
new_transformation = [[0.4, -0.7, -2.8, 0.3], [-0.95, 3.4, 0.5, 0.7], [0.8, 0.7, 0.95, -3.4], [0.4, -2.7, -1.8, 0.5]]
X = np.dot(X, new_transformation)
X_no_use = np.random.normal(14, 3.5, size=(sum(n_samples), 1))
X = np.concatenate((X, X_no_use), axis=1)
transformation_age_weight = [
    [1,0,0,0,0],
    [0,0.8,0,0,0.3],
    [0,0,1,0,0],
    [0,0,0,1,0],
    [0,0.2,0,0,0.9],
]
X = np.dot(X, transformation_age_weight)

In [None]:
blob_X = pd.DataFrame(data=X, columns=['temperature', 'weight', 'tail_length', 'altitude', 'age'])
blob_X_scaled = StandardScaler().fit_transform(blob_X)
blob_X_scaled = pd.DataFrame(data=blob_X_scaled, columns=['temperature', 'weight', 'tail_length', 'altitude', 'age'])
blob_y = pd.DataFrame(data=y, columns=['species'])

In [None]:
pd.DataFrame(blob_X).to_csv('x_blobs.csv', index=False)
pd.DataFrame(blob_X_scaled).to_csv("x_blobs_scaled.csv", index=False)
pd.DataFrame(blob_y).to_csv("y_blobs.csv", index=False)