### Import Libraries

In [1]:
import pandas as pd
import numpy as np
import datetime as dt

In [2]:
# Define number of samples for each feature vector
m_samples = 100

#### Generate numeric columns 

In [3]:
numeric_1 = 1200 * np.random.random(size=(m_samples,1))
numeric_1 = pd.Series(numeric_1.flatten())

In [4]:
numeric_2 = m_samples * np.random.random(size=(m_samples,1))
numeric_2 = pd.Series(numeric_2.flatten())

In [5]:
numeric_3 = m_samples * np.random.random(size=(m_samples,1))
numeric_3 = pd.Series(numeric_3.flatten())

##### Add NaNs to `numeric_2` and `numeric_3`

In [6]:
random_ints = np.random.random(size=m_samples).flatten()

In [7]:
numeric_2.loc[random_ints < 0.4] = np.nan

In [8]:
random_ints = np.random.random(size=m_samples).flatten()
numeric_3.loc[random_ints < 0.9] = np.nan

#### Generate datetime columns 

In [9]:
datetimes_1 = pd.date_range(dt.datetime.now(), periods=m_samples, freq='1703587S')
datetimes_1 = pd.Series(datetimes_1).sample(frac=1)
datetimes_1 = datetimes_1.reset_index().drop('index', axis=1)

In [10]:
datetime = dt.datetime(year=2000, month=3, day=21, hour=22, minute=2, second=23)
datetimes_2 = pd.date_range(datetime, periods=m_samples, freq='30173587S')
datetimes_2 = pd.Series(datetimes_2).sample(frac=1)
datetimes_2 = datetimes_2.reset_index().drop('index', axis=1)

In [11]:
datetimes_1.iloc[0,0].weekday()

0

#### Generate boolean-like columns

In [12]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like[random_ints >= 0.5] = 'True'
boolean_like[random_ints < 0.5] = 'False'

In [13]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like_2 = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like_2[random_ints >= 0.5] = 'true'
boolean_like_2[random_ints < 0.5] = 'false'

In [14]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like_3 = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like_3[random_ints >= 0.5] = 'TRUE'
boolean_like_3[random_ints < 0.5] = 'FALSE'

In [15]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean_like_4 = pd.Series(random_ints)

# Assign str True and False values to 
boolean_like_4[random_ints >= 0.5] = 'TRUE'
boolean_like_4[random_ints < 0.5] = 'False'
boolean_like_4[random_ints >= 0.8] = 'True'

#### Generate true Boolean column

In [16]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
boolean = pd.Series(random_ints)

# Assign str True and False values to 
boolean[random_ints >= 0.5] = True
boolean[random_ints < 0.5] = False

#### Generate nominal column

In [18]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
nominal = pd.Series(random_ints)

# Assign str True and False values to 
nominal[random_ints >= 0.2] = 'Mouse'
nominal[random_ints > 0.4] = 'Cat'
nominal[random_ints < 0.2] = 'Dog'
nominal[random_ints > 0.8] = 'Iguana'

#### Generate ordinal column

In [19]:
# Generate array of random ints from 0 to 1
random_ints = np.random.random(size=m_samples).flatten()

# Initialise Series
ordinal_1 = pd.Series(random_ints)

In [20]:
ordinal_1[random_ints > 0] = 'strongly disagree'
ordinal_1[random_ints > 0.2] = 'disagree'
ordinal_1[random_ints > 0.4] = 'neither agree nor disagree'
ordinal_1[random_ints > 0.6] = 'agree'
ordinal_1[random_ints > 0.8] = 'strongly agree'

### Stitch together final dataframe

In [21]:
# Initialise final dataframe
df = pd.DataFrame(index=datetimes_1.index)

# Add datetime features
df['datetimes_1'] = datetimes_1
df['datetimes_2'] = datetimes_2

# Add numeric features
df['numeric_1'] = numeric_1
df['numeric_2'] = numeric_2
df['numeric_3'] = numeric_3

In [22]:
# Add Boolean-like features
df['boolean_like_1'] = boolean_like
df['boolean_like_2'] = boolean_like_2
df['boolean_like_3'] = boolean_like_3
df['boolean_like_4'] = boolean_like_4

In [23]:
# Add true Boolean features
df['boolean'] = boolean

In [24]:
# Add nominal features
df['nominal'] = nominal

In [25]:
# Add ordinal features
df['ordinal_1'] = ordinal_1

### Save to current directory

In [26]:
df.to_csv(path_or_buf='dataset_test.csv')

In [27]:
df

Unnamed: 0,datetimes_1,datetimes_2,numeric_1,numeric_2,numeric_3,boolean_like_1,boolean_like_2,boolean_like_3,boolean_like_4,boolean,nominal,ordinal_1
0,2022-12-05 00:00:19.201864,2060-06-16 11:48:44,1085.363773,,,False,true,FALSE,False,False,Mouse,neither agree nor disagree
1,2019-11-07 02:32:40.201864,2014-07-25 09:19:08,924.379152,42.915310,,False,false,TRUE,TRUE,True,Mouse,strongly disagree
2,2019-07-31 12:27:05.201864,2055-09-05 08:03:09,566.488961,34.995786,,True,true,FALSE,TRUE,True,Dog,disagree
3,2023-07-29 14:37:43.201864,2090-02-05 15:55:21,822.036241,,18.199072,True,false,TRUE,TRUE,False,Cat,strongly agree
4,2020-08-09 03:36:18.201864,2087-03-25 23:16:00,679.786375,,,False,false,TRUE,TRUE,True,Iguana,strongly disagree
5,2023-11-24 21:56:25.201864,2045-02-27 18:58:52,575.920156,18.179089,,True,false,FALSE,True,False,Dog,strongly disagree
6,2019-09-28 16:06:26.201864,2046-02-12 00:31:59,268.480528,,,True,true,TRUE,TRUE,True,Iguana,strongly disagree
7,2024-06-09 02:07:35.201864,2058-07-19 00:42:30,893.373673,9.859781,,False,true,TRUE,TRUE,True,Iguana,strongly agree
8,2022-10-06 20:20:58.201864,2031-10-10 13:15:14,145.758175,,,False,true,TRUE,TRUE,True,Cat,strongly agree
9,2021-03-14 01:00:35.201864,2070-12-23 00:53:01,172.357125,89.465236,,True,true,FALSE,False,True,Cat,disagree
