### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
# Define number of samples for each feature vector
m_samples = 20

#### Generate numeric columns 

In [3]:
numeric_1 = 50 * np.random.random(size=(m_samples,1))
numeric_1 = pd.Series(numeric_1.flatten())

In [4]:
numeric_2 = m_samples * np.random.random(size=(m_samples,1))
numeric_2 = pd.Series(numeric_2.flatten())

In [5]:
numeric_3 = m_samples * np.random.random(size=(m_samples,1))
numeric_3 = pd.Series(numeric_3.flatten())

##### Add NaNs to `numeric_2` and `numeric_3`

In [6]:
random_ints = np.random.random(size=m_samples).flatten()

In [7]:
numeric_2.loc[random_ints < 0.2] = np.nan

In [8]:
random_ints = np.random.random(size=m_samples).flatten()
numeric_3.loc[random_ints < 0.8] = np.nan

#### Generate datetime columns 

In [9]:
datetimes_1 = pd.date_range(dt.datetime.now(), periods=m_samples, freq='1703587S')
datetimes_1 = pd.Series(datetimes_1).sample(frac=1)
datetimes_1 = datetimes_1.reset_index().drop('index', axis=1)

In [10]:
datetime = dt.datetime(year=2000, month=3, day=21, hour=22, minute=2, second=23)
datetimes_2 = pd.date_range(datetime, periods=m_samples, freq='21173587S')
datetimes_2 = pd.Series(datetimes_2).sample(frac=1)
datetimes_2 = datetimes_2.reset_index().drop('index', axis=1)

In [11]:
datetimes_1.iloc[0,0].weekday()

2

#### Generate boolean-like columns

In [12]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like[random_ints >= 0.5] = 'True'
boolean_like[random_ints < 0.5] = 'False'

In [13]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like_2 = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like_2[random_ints >= 0.5] = 'true'
boolean_like_2[random_ints < 0.5] = 'false'

In [14]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like_3 = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like_3[random_ints >= 0.5] = 'TRUE'
boolean_like_3[random_ints < 0.5] = 'FALSE'

In [15]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like_4 = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like_4[random_ints >= 0.5] = 'TRUE'
boolean_like_4[random_ints < 0.5] = 'False'
boolean_like_4[random_ints >= 0.8] = 'True'

#### Generate true Boolean column

In [16]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean = pd.Series(random_ints)

# Assign str True and False values to 
boolean[random_ints >= 0.5] = True
boolean[random_ints < 0.5] = False

#### Generate nominal column

In [17]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
nominal = pd.Series(random_ints)

# Assign str True and False values to 
nominal[random_ints >= 0.33] = 'Cat'
nominal[random_ints < 0.33] = 'Dog'
nominal[random_ints > 0.67] = 'Lizard'

#### Generate ordinal column

In [18]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
ordinal_1 = pd.Series(random_ints)

In [19]:
ordinal_1[random_ints > 0] = 'strongly disagree'
ordinal_1[random_ints > 0.2] = 'disagree'
ordinal_1[random_ints > 0.4] = 'neither agree nor disagree'
ordinal_1[random_ints > 0.6] = 'agree'
ordinal_1[random_ints > 0.8] = 'strongly agree'

### Stitch together final dataframe

In [20]:
# Initialise final dataframe
df = pd.DataFrame(index=datetimes_1.index)

# Add datetime features
df['datetimes_1'] = datetimes_1
df['datetimes_2'] = datetimes_2

# Add numeric features
df['numeric_1'] = numeric_1
df['numeric_2'] = numeric_2
df['numeric_3'] = numeric_3

In [21]:
# Add Boolean-like features
df['boolean_like_1'] = boolean_like
df['boolean_like_2'] = boolean_like_2
df['boolean_like_3'] = boolean_like_3
df['boolean_like_4'] = boolean_like_4

In [22]:
# Add true Boolean features
df['boolean'] = boolean

In [23]:
# Add nominal features
df['nominal'] = nominal

In [24]:
# Add ordinal features
df['ordinal_1'] = ordinal_1

### Save to current directory

In [25]:
df.to_csv(path_or_buf='dataset.csv')

In [31]:
df

Unnamed: 0,datetimes_1,datetimes_2,numeric_1,numeric_2,numeric_3,boolean_like_1,boolean_like_2,boolean_like_3,boolean_like_4,boolean,nominal,ordinal_1
0,2019-09-25 13:55:32.331191,2002-11-27 04:14:51,46.552984,13.739006,,True,True,False,False,True,Dog,agree
1,2019-07-08 17:03:04.331191,2012-12-20 03:31:36,25.588787,,,False,False,True,False,False,Dog,strongly agree
2,2019-06-18 23:49:57.331191,2008-12-10 18:12:54,25.487432,6.182592,,False,True,False,False,True,Dog,disagree
3,2019-12-13 10:48:00.331191,2000-03-21 22:02:23,4.535463,15.181189,9.776362,False,False,False,True,False,Dog,strongly disagree
4,2019-10-15 07:08:39.331191,2010-12-15 22:52:15,24.660904,1.107595,,True,False,False,False,False,Lizard,neither agree nor disagree
5,2020-03-21 00:53:35.331191,2006-12-06 13:33:33,22.562598,7.249762,,True,False,False,False,False,Dog,agree
6,2019-05-10 13:23:43.331191,2007-08-08 15:06:40,16.756842,7.094363,4.042631,True,True,True,False,False,Cat,strongly agree
7,2020-02-10 14:27:21.331191,2002-03-27 02:41:44,25.524859,,,False,True,True,True,False,Cat,neither agree nor disagree
8,2020-03-01 07:40:28.331191,2000-11-21 23:35:30,22.102289,14.610025,,True,True,False,False,True,Lizard,strongly agree
9,2020-01-02 04:01:07.331191,2004-03-31 07:21:05,36.366723,1.534639,,True,True,True,False,False,Dog,strongly disagree
