#### Feature Engineering  

Dataset: 
- _fs_clean.csv_

Author: Luis Sergio Pastrana Lemus  
Date: 2025-09-09

# Feature engineering – Food Supplier Dataset

## __1. Libraries__.

In [1]:
from pathlib import Path
import sys

# Define project root dynamically, gets the current directory from which the notebook belongs and moves one level upper
project_root = Path.cwd().parent

# Add src to sys.path if it is not already
if str(project_root) not in sys.path:

    sys.path.append(str(project_root))

# Import function directly (more controlled than import *)
from src import *

from functools import partial
from IPython.display import display, HTML
import numpy as np
import os
import pandas as pd

## __2. Path to Data file__.

In [2]:
# Build route to data file and upload
data_file_path = project_root / "data" / "processed" / "clean"
df_fs_clean = load_dataset_from_csv(data_file_path, "fs_clean.csv", sep=',', header='infer')

In [3]:
# Format notebook output
format_notebook()

## __Functions__.

In [4]:
# Function 

## 3 __Casting to data types__.

### 3.1 Casting to string data type.

In [5]:
# Casting dtypes
# df_fs 'eventname' to category
df_fs_clean.loc[:, 'eventname'] = df_fs_clean['eventname'].astype('category')
df_fs_clean['eventname'].dtype

# df_fs 'eventtimestamp' to datetime
df_fs_clean.loc[:, :] = normalize_datetime(df_fs_clean, include=['datetime'], unix_unit='s')
df_fs_clean.dtypes


eventname                    object
deviceidhash                  int64
datetime        datetime64[ns, UTC]
expid                         int64
dtype: object

## 4. Feature Engineering.

### 4.1 Datasets.

#### 4.1.1 fs_clean.

In [6]:
df_fs_clean

Unnamed: 0,eventname,deviceidhash,datetime,expid
0,mainscreenappear,4575588528974610257,2019-07-25 04:43:36+00:00,246
1,mainscreenappear,7416695313311560658,2019-07-25 11:11:42+00:00,246
2,paymentscreensuccessful,3518123091307005509,2019-07-25 11:28:47+00:00,248
3,cartscreenappear,3518123091307005509,2019-07-25 11:28:47+00:00,248
4,paymentscreensuccessful,6217807653094995999,2019-07-25 11:48:42+00:00,248
...,...,...,...,...
243708,mainscreenappear,4599628364049201812,2019-08-07 21:12:25+00:00,247
243709,mainscreenappear,5849806612437486590,2019-08-07 21:13:59+00:00,246
243710,mainscreenappear,5746969938801999050,2019-08-07 21:14:43+00:00,246
243711,mainscreenappear,5746969938801999050,2019-08-07 21:14:58+00:00,246


In [7]:
# Add date column
df_fs_clean['date'] = df_fs_clean['datetime'].dt.date

# Add time column
df_fs_clean['time'] = df_fs_clean['datetime'].dt.time

df_fs_clean

Unnamed: 0,eventname,deviceidhash,datetime,expid,date,time
0,mainscreenappear,4575588528974610257,2019-07-25 04:43:36+00:00,246,2019-07-25,04:43:36
1,mainscreenappear,7416695313311560658,2019-07-25 11:11:42+00:00,246,2019-07-25,11:11:42
2,paymentscreensuccessful,3518123091307005509,2019-07-25 11:28:47+00:00,248,2019-07-25,11:28:47
3,cartscreenappear,3518123091307005509,2019-07-25 11:28:47+00:00,248,2019-07-25,11:28:47
4,paymentscreensuccessful,6217807653094995999,2019-07-25 11:48:42+00:00,248,2019-07-25,11:48:42
...,...,...,...,...,...,...
243708,mainscreenappear,4599628364049201812,2019-08-07 21:12:25+00:00,247,2019-08-07,21:12:25
243709,mainscreenappear,5849806612437486590,2019-08-07 21:13:59+00:00,246,2019-08-07,21:13:59
243710,mainscreenappear,5746969938801999050,2019-08-07 21:14:43+00:00,246,2019-08-07,21:14:43
243711,mainscreenappear,5746969938801999050,2019-08-07 21:14:58+00:00,246,2019-08-07,21:14:58


In [8]:
project_root = Path.cwd().parent
processed_path = project_root / "data" / "processed" / "feature" / "fs_feature.csv"

df_fs_clean.to_csv(processed_path, index=False)