# Adding project root folder to 'sys.path'

In [1]:
import sys
from pathlib import Path

In [2]:
# > NOTE: We use 'r' so that the path mentioned below doesn't create issues with escape characters

r'''
When we write this

"from agno_app.data_loader import load_raw_marketing_data"

Python searches for a folder or module named agno_app in a list of directories called sys.path.

sys.path = list of directories where Python looks for imports (including "." which means current
working directory)

If agno_app is not inside any directory listed in sys.path, Python raises

ModuleNotFoundError: No module named 'agno_app'

When we run this notebook, we are inside "Agno_Customer_Personality_Analysis_Agent/notebooks/"

But, agno_app is in "Agno_Customer_Personality_Analysis_Agent/agno_app/"

So, we need to add the parent directory of "notebooks" (i.e. the project root -

 e:\Users\Anirudh\Job\Projects\Agno_Customer_Personality_Analysis_Agent) to sys.path 

'''

'\nWhen we write this\n\n"from agno_app.data_loader import load_raw_marketing_data"\n\nPython searches for a folder or module named agno_app in a list of directories called sys.path.\n\nsys.path = list of directories where Python looks for imports (including "." which means current\nworking directory)\n\nIf agno_app is not inside any directory listed in sys.path, Python raises\n\nModuleNotFoundError: No module named \'agno_app\'\n\nWhen we run this notebook, we are inside "Agno_Customer_Personality_Analysis_Agent/notebooks/"\n\nBut, agno_app is in "Agno_Customer_Personality_Analysis_Agent/agno_app/"\n\nSo, we need to add the parent directory of "notebooks" (i.e. the project root -\n\n e:\\Users\\Anirudh\\Job\\Projects\\Agno_Customer_Personality_Analysis_Agent) to sys.path \n\n'

In [3]:
# Project root = parent of "notebooks" folder
PROJECT_ROOT = Path.cwd().parent

# Add project root to sys.path if not already there
if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("Using PROJECT_ROOT:", PROJECT_ROOT)

Using PROJECT_ROOT: e:\Users\Anirudh\Job\Projects\Agno_Customer_Personality_Analysis_Agent


# Check 'data_load_and_clean' file

## 1. Checking 'load_raw_marketing_data' method

In [4]:
from agno_app.data_load_and_clean import load_raw_marketing_data

df = load_raw_marketing_data()
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2240 entries, 0 to 2239
Data columns (total 29 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   ID                   2240 non-null   int64  
 1   Year_Birth           2240 non-null   int64  
 2   Education            2240 non-null   object 
 3   Marital_Status       2240 non-null   object 
 4   Income               2216 non-null   float64
 5   Kidhome              2240 non-null   int64  
 6   Teenhome             2240 non-null   int64  
 7   Dt_Customer          2240 non-null   object 
 8   Recency              2240 non-null   int64  
 9   MntWines             2240 non-null   int64  
 10  MntFruits            2240 non-null   int64  
 11  MntMeatProducts      2240 non-null   int64  
 12  MntFishProducts      2240 non-null   int64  
 13  MntSweetProducts     2240 non-null   int64  
 14  MntGoldProds         2240 non-null   int64  
 15  NumDealsPurchases    2240 non-null   i

In [5]:
df.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
0,5524,1957,Graduation,Single,58138.0,0,0,04-09-2012,58,635,...,7,0,0,0,0,0,0,3,11,1
1,2174,1954,Graduation,Single,46344.0,1,1,08-03-2014,38,11,...,5,0,0,0,0,0,0,3,11,0
2,4141,1965,Graduation,Together,71613.0,0,0,21-08-2013,26,426,...,4,0,0,0,0,0,0,3,11,0
3,6182,1984,Graduation,Together,26646.0,1,0,10-02-2014,26,11,...,6,0,0,0,0,0,0,3,11,0
4,5324,1981,PhD,Married,58293.0,1,0,19-01-2014,94,173,...,5,0,0,0,0,0,0,3,11,0


In [6]:
# lowering the "Marital_Status" & "Education" column values for comparison
df["Marital_Status"] = df["Marital_Status"].str.strip().str.lower()
df["Education"] = df["Education"].str.strip().str.lower()
df.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,NumWebVisitsMonth,AcceptedCmp3,AcceptedCmp4,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Z_CostContact,Z_Revenue,Response
0,5524,1957,graduation,single,58138.0,0,0,04-09-2012,58,635,...,7,0,0,0,0,0,0,3,11,1
1,2174,1954,graduation,single,46344.0,1,1,08-03-2014,38,11,...,5,0,0,0,0,0,0,3,11,0
2,4141,1965,graduation,together,71613.0,0,0,21-08-2013,26,426,...,4,0,0,0,0,0,0,3,11,0
3,6182,1984,graduation,together,26646.0,1,0,10-02-2014,26,11,...,6,0,0,0,0,0,0,3,11,0
4,5324,1981,phd,married,58293.0,1,0,19-01-2014,94,173,...,5,0,0,0,0,0,0,3,11,0


# Checking Data understanding & cleaning tool

In [7]:
from agno_app.data_load_and_clean import get_final_dataset

In [8]:
final_df = get_final_dataset()
final_df.head()

Unnamed: 0,ID,Year_Birth,Education,Marital_Status,Income,Kidhome,Teenhome,Dt_Customer,Recency,MntWines,...,AcceptedCmp5,AcceptedCmp1,AcceptedCmp2,Complain,Response,Total_Purchases,Total_Children,CustomerTenureDays,TotalSpend,IsHighValue
0,5524,1957,graduate,single,58138.0,0,0,2012-09-04,58,635,...,0,0,0,0,1,29,0,663,1617,True
1,2174,1954,graduate,single,46344.0,1,1,2014-03-08,38,11,...,0,0,0,0,0,9,2,113,27,False
2,4141,1965,graduate,together,71613.0,0,0,2013-08-21,26,426,...,0,0,0,0,0,24,0,312,776,False
3,6182,1984,graduate,together,26646.0,1,0,2014-02-10,26,11,...,0,0,0,0,0,12,1,139,53,False
4,5324,1981,phd,married,58293.0,1,0,2014-01-19,94,173,...,0,0,0,0,0,19,1,161,422,False


# Checking 'data_tools.py' file

In [9]:
print(Path.cwd().parent)

e:\Users\Anirudh\Job\Projects\Agno_Customer_Personality_Analysis_Agent


In [11]:
from tools.data_tools import _global_stats_impl, _segment_stats_impl, _top_customers_by_spend_impl

In [13]:
print("Global Stats:\n")
print(_global_stats_impl())

Global Stats:

{'\nn_customers': 2240, '\navg_income': 52237.98, '\navg_total_spend': 605.8, '\navg_recency_days': 49.11, '\navg_customer_tenure_days': 353.58, '\npct_high_value_customers': 20.04}


In [20]:
print("\nSegment Stats (Married, Has Children, High Value Only):\n")
print(_segment_stats_impl(marital_status="married", has_children=True, high_value_only=True))


Segment Stats (Married, Has Children, High Value Only):

{'n_customers': 57, 'avg_income': 71948.96, 'avg_total_spend': 1456.81, 'avg_recency_days': 52.53, 'avg_customer_tenure_days': 474.6, 'pct_high_value_customers': 100.0}


In [14]:
print("\nTop 5 Customers by Spend:\n")
print(_top_customers_by_spend_impl(5))


Top 5 Customers by Spend:

{'customers': [{'customer_id': 5350, 'income': 90638.0, 'total_spend': 2525.0, 'total_children': 0, 'recency_days': 29.0, 'customer_tenure_days': 136.0}, {'customer_id': 5735, 'income': 90638.0, 'total_spend': 2525.0, 'total_children': 0, 'recency_days': 29.0, 'customer_tenure_days': 136.0}, {'customer_id': 1763, 'income': 87679.0, 'total_spend': 2524.0, 'total_children': 0, 'recency_days': 62.0, 'customer_tenure_days': 337.0}, {'customer_id': 4580, 'income': 75759.0, 'total_spend': 2486.0, 'total_children': 0, 'recency_days': 46.0, 'customer_tenure_days': 247.0}, {'customer_id': 4475, 'income': 69098.0, 'total_spend': 2440.0, 'total_children': 0, 'recency_days': 82.0, 'customer_tenure_days': 498.0}]}
