#### Business Analysis

Dataset: 

- _fs_norm.csv_

Author: Luis Sergio Pastrana Lemus  
Date: 2025-09-09

# Business Analysis – Food Supplier Dataset

## __1. Libraries__.

In [1]:
from pathlib import Path
import sys

# Define project root dynamically, gets the current directory from which the notebook belongs and moves one level upper
project_root = Path.cwd().parent

# Add src to sys.path if it is not already
if str(project_root) not in sys.path:

    sys.path.append(str(project_root))

# Import function directly (more controlled than import *)
from src import *


from IPython.display import display, HTML
import os
import pandas as pd

## __2. Path to Data file__.

In [2]:
# Build route to data file and upload
data_file_path = project_root / "data" / "processed" / "clean"

df_fs = load_dataset_from_csv(data_file_path, "fs_norm.csv", sep=',', header='infer')

## __3. Exploratory Business Analysis__.

### 3.0 Casting Data types.

In [3]:
# Casting dtypes
# df_fs 'eventname' to category
df_fs.loc[:, 'eventname'] = df_fs['eventname'].astype('category')
df_fs['eventname'].dtype

df_fs.loc[:, :] = normalize_datetime(df_fs, include=['datetime'])

# dfs 'date' and 'time' to datetime
df_fs['date'] = pd.to_datetime(df_fs['date']).dt.date
df_fs['time'] = pd.to_datetime(df_fs['time'], format='%H:%M:%S').dt.time

df_fs.dtypes

eventname               object
deviceidhash             int64
datetime        datetime64[...
expid                    int64
date                    object
time                    object
dtype: object

In [4]:
df_fs.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 240887 entries, 0 to 240886
Data columns (total 6 columns):
 #   Column        Non-Null Count   Dtype              
---  ------        --------------   -----              
 0   eventname     240887 non-null  object             
 1   deviceidhash  240887 non-null  int64              
 2   datetime      240887 non-null  datetime64[ns, UTC]
 3   expid         240887 non-null  int64              
 4   date          240887 non-null  object             
 5   time          240887 non-null  object             
dtypes: datetime64[ns, UTC](1), int64(2), object(3)
memory usage: 11.0+ MB


In [5]:
df_fs

Unnamed: 0,eventname,deviceidhash,datetime,expid,date,time
0,tutorial,37374620466...,2019-08-01 0...,246,2019-08-01,00:07:28
1,mainscreena...,37374620466...,2019-08-01 0...,246,2019-08-01,00:08:00
2,mainscreena...,37374620466...,2019-08-01 0...,246,2019-08-01,00:08:55
3,offersscree...,37374620466...,2019-08-01 0...,246,2019-08-01,00:08:58
4,mainscreena...,14338408838...,2019-08-01 0...,247,2019-08-01,00:08:59
...,...,...,...,...,...,...
240882,mainscreena...,45996283640...,2019-08-07 2...,247,2019-08-07,21:12:25
240883,mainscreena...,58498066124...,2019-08-07 2...,246,2019-08-07,21:13:59
240884,mainscreena...,57469699388...,2019-08-07 2...,246,2019-08-07,21:14:43
240885,mainscreena...,57469699388...,2019-08-07 2...,246,2019-08-07,21:14:58


### 3.1  Events Funnel Analysis.

#### 3.1.1 Review the events listed in the logs and their frequency of occurrence. Sort them by frequency.

In [6]:
# Events frequency and sorting
df_fs_events = df_fs.groupby(['date', df_fs['datetime'].dt.hour]).size()
df_fs_events.name = 'events'
df_fs_events = df_fs_events.reset_index()
df_fs_events['datetime'] = df_fs_events['date'].astype(str) + '_' + df_fs_events['datetime'].astype(str)
df_fs_events = df_fs_events.drop(columns='date')
df_fs_events

Unnamed: 0,datetime,events
0,2019-08-01_0,192
1,2019-08-01_1,228
2,2019-08-01_2,328
3,2019-08-01_3,657
4,2019-08-01_4,837
...,...,...
161,2019-08-07_17,1906
162,2019-08-07_18,1679
163,2019-08-07_19,1507
164,2019-08-07_20,949


In [7]:
plot_vertical_bar_plotpx(df_fs_events, x='datetime', y='events', title='Events Frequency', xlabel='Date', ylabel='Events', sort=True)

#### 3.1.2 Find the number of users who performed each of these actions. Sort the events by the number of users. Calculate the percentage of users who performed the action at least once.

In [8]:
# Show users per actions
df_fs_events = df_fs.groupby('eventname').agg(events=('eventname', 'count'), users=('deviceidhash', 'nunique')).sort_values(by='users', ascending=False).reset_index()
df_fs_events

Unnamed: 0,eventname,events,users
0,mainscreena...,117328,7419
1,offersscree...,46333,4593
2,cartscreena...,42303,3734
3,paymentscre...,33918,3539
4,tutorial,1005,840


In [9]:
plot_horizontal_bar_plotpx(df_fs_events, x='users', y='eventname', title='Conversion Funnel', xlabel='Users', ylabel='Events', sort=True)

In [10]:
# Show percentage of users that perfomed the action at least once
df_fs_events['conversion_event'] = ((df_fs_events['users'] / df_fs_events['events']) * 100).round(3)
df_fs_events

Unnamed: 0,eventname,events,users,conversion_event
0,mainscreena...,117328,7419,6.323
1,offersscree...,46333,4593,9.913
2,cartscreena...,42303,3734,8.827
3,paymentscre...,33918,3539,10.434
4,tutorial,1005,840,83.582


`LSPL`
- Order in which the events occurred: mainscreenappear, offersscreenappear, cartscreenappear, paymentscreensuccessful, tutorial.
- tutorial event seems not to be part of the same sequence, proper sequence should be clarified ans confirmed by the customer(mainscreenappear, offersscreenappear, cartscreenappear, paymentscreensuccessful)

#### 3.1.2 Use the event funnel to find the percentage of users who move from one stage to the next.

In [11]:
# Show Total convesion rate
df_fs_events['totalconversionrate'] = ((df_fs_events['users'] / df_fs_events.loc[0, 'users']) * 100).round(3)
df_fs_events

Unnamed: 0,eventname,events,users,conversion_event,totalconversionrate
0,mainscreena...,117328,7419,6.323,100.0
1,offersscree...,46333,4593,9.913,61.909
2,cartscreena...,42303,3734,8.827,50.33
3,paymentscre...,33918,3539,10.434,47.702
4,tutorial,1005,840,83.582,11.322


In [12]:
# Show Total convesion next stage rate
df_fs_events['conversionstagerate'] = ((df_fs_events['users'] / df_fs_events['users'].shift(1)) * 100).round(3)
df_fs_events

Unnamed: 0,eventname,events,users,conversion_event,totalconversionrate,conversionstagerate
0,mainscreena...,117328,7419,6.323,100.0,
1,offersscree...,46333,4593,9.913,61.909,61.909
2,cartscreena...,42303,3734,8.827,50.33,81.298
3,paymentscre...,33918,3539,10.434,47.702,94.778
4,tutorial,1005,840,83.582,11.322,23.736


In [13]:
df_fs_events['droprate'] = (100 - df_fs_events['conversionstagerate']).round(3)
df_fs_events

Unnamed: 0,eventname,events,users,conversion_event,totalconversionrate,conversionstagerate,droprate
0,mainscreena...,117328,7419,6.323,100.0,,
1,offersscree...,46333,4593,9.913,61.909,61.909,38.091
2,cartscreena...,42303,3734,8.827,50.33,81.298,18.702
3,paymentscre...,33918,3539,10.434,47.702,94.778,5.222
4,tutorial,1005,840,83.582,11.322,23.736,76.264


In [14]:
plot_horizontal_bar_plotpx(df_fs_events, x='totalconversionrate', y='eventname', title='Conversion Rate', xlabel='Conversion Rate', ylabel='Events', sort=True)

In [15]:
plot_horizontal_bar_plotpx(df_fs_events, x='conversionstagerate', y='eventname', title='Conversion Rate per Stage', xlabel='Conversion Rate', ylabel='Events', sort=True)

In [16]:
plot_horizontal_bar_plotpx(df_fs_events, x='droprate', y='eventname', title='Drop Rate per Stage', xlabel='Drop Rate', ylabel='Events', sort=True)

In [18]:
# Percentage of users who complete the entire journey from their first interaction to making a purchase
display(HTML(f"> User rate through complete funnel: {((df_fs_events.loc[3, 'users'] / df_fs_events.loc[0, 'users']) * 100):.3f}%"))