In [None]:
# Import all required libraries
import pandas as pd
import numpy as np
from sklearn.datasets import make_classification 

In [None]:
# Create a simulated feature matrix and output vector with 1000 samples
features, label = make_classification(n_samples=1000, # 10000 samples
                                       # ten features
                                       n_features=10,
                                       # five features that actually predict the output's classes
                                       n_informative=5, 
                                       # five features that are random and unrelated to the output's classes
                                       n_redundant=5,
                                       # five output classes
                                       n_classes=5,
                                       # with 20% of observations in the first class, 30% in the second class,
                                       # and 50% in the third class. ('None' makes balanced classes)
                                       weights=None)

print(f"First 10 samples:\n{features[:10]}")
print(f"Dataset Shape{features.shape}")
print(f"First 10 output:\n{label[:10]}")
print(f"Label shape: {label.shape}")

First 10 samples:
[[ 1.16606419e+00 -1.66810333e-01 -4.04697600e-01 -1.18989926e+00
  -2.67577871e-01 -5.19075116e-02 -1.30853404e-01  6.04296079e-01
  -5.29527011e-01 -2.16859384e-01]
 [ 1.53214018e+00 -1.12815406e+00  1.72440452e+00  4.47041512e-01
  -1.21249127e+00  2.14796478e+00  3.80493951e-01  1.73483568e+00
  -1.12207736e+00  1.56691846e+00]
 [-1.07759220e+00  1.09655782e+00 -1.35727304e+00 -5.43781447e-02
  -2.28636667e-01 -1.70759559e+00  1.68686965e+00 -2.08319355e+00
   1.35366727e+00 -5.09307136e-01]
 [ 2.18145905e+00  1.02748101e+00 -1.71890143e+00 -2.54786711e+00
  -1.74607231e+00 -1.44482860e+00  1.96768754e+00 -3.17675319e-01
  -7.12899505e-01 -4.70101648e-01]
 [ 9.38528731e-01 -3.16797259e-01  3.14666865e+00  2.54684769e+00
  -5.12762148e-01  2.89740019e-01 -2.66233174e+00  1.41925991e+00
   6.50786639e-02 -6.63175297e-01]
 [-2.73357540e+00  1.59586043e+00 -6.72796199e-01  9.56942707e-01
   1.44473454e+00 -8.87569077e-01 -5.35608312e-01 -1.30873707e+00
  -4.20914719e-

In [None]:
# View the first 10 samples and their 10 features
pd.DataFrame(features).head(10)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,1.635849,0.516414,-5.159295,0.021534,0.743662,0.739299,3.682534,-0.53113,2.839652,4.764157
1,2.157856,0.563183,-3.064599,0.798861,0.042857,-1.872478,0.924443,2.564987,1.272741,0.545223
2,-1.038238,1.498518,-0.465559,-0.980932,1.004402,-0.445746,0.528436,-0.728779,-1.174071,-0.569227
3,0.065288,0.095659,-2.349467,0.898223,1.958676,-0.859957,2.466349,-0.817023,-0.422576,3.35348
4,-0.444639,-0.749461,0.988647,1.040685,-0.401939,-0.332187,0.028299,0.293128,-0.510441,-0.290405
5,1.208026,-2.989,0.750827,1.431481,-0.728526,1.883828,-0.152353,-0.781466,1.864619,3.187954
6,-0.29996,3.584484,-3.715586,-2.80505,2.801199,-0.646535,1.678731,-1.395677,-0.688798,0.918349
7,-0.076112,2.103308,-2.369527,1.308731,-1.823127,-3.108007,2.05831,4.069181,0.08038,-3.531274
8,-1.173783,-0.269613,0.942181,2.330227,-2.480956,-1.514385,1.109262,2.504943,-0.444428,-2.919484
9,-0.209182,2.181856,-0.720624,-0.607371,-1.260444,-1.658532,-0.133117,2.47602,-0.219152,-4.030565


In [None]:
# View the first 10 output class labels 
pd.DataFrame(label).head(10)

Unnamed: 0,0
0,1
1,0
2,2
3,4
4,4
5,2
6,0
7,2
8,2
9,4


In [None]:
# Create a dataframe for both features and label arrays
df = pd.DataFrame(np.hstack((features, label.reshape(-1, 1))))
df.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10
0,1.166064,-0.16681,-0.404698,-1.189899,-0.267578,-0.051908,-0.130853,0.604296,-0.529527,-0.216859,1.0
1,1.53214,-1.128154,1.724405,0.447042,-1.212491,2.147965,0.380494,1.734836,-1.122077,1.566918,0.0
2,-1.077592,1.096558,-1.357273,-0.054378,-0.228637,-1.707596,1.68687,-2.083194,1.353667,-0.509307,2.0
3,2.181459,1.027481,-1.718901,-2.547867,-1.746072,-1.444829,1.967688,-0.317675,-0.7129,-0.470102,4.0
4,0.938529,-0.316797,3.146669,2.546848,-0.512762,0.28974,-2.662332,1.41926,0.065079,-0.663175,4.0
