In [2]:
pwd()

'c:\\Users\\asus\\Documents\\Projects\\ECommerce Customer Churn Prediction\\notebooks'

In [3]:
import os
os.chdir("../")

In [3]:
import pandas as pd
import numpy as np
import pyodbc
from sklearn.preprocessing import StandardScaler
from src.configuration.config import ConfigurationManager



In [8]:
import os
import pandas as pd
import pyodbc
from src.logger import logger
import sqlite3

class DataTransformationStorage:
    def __init__(self):
        self.config=ConfigurationManager().get_data_storage_config()
        #self.data_config=ConfigurationManager().get_data_ingestion_config()
        print(self.config)
    def store_in_sql(self,df):
        # SQLite Database Setup
        conn = sqlite3.connect(self.config.db_path)
        cursor = conn.cursor()

        # Create Table for Transformed Data
        cursor.execute(f'''
        CREATE TABLE IF NOT EXISTS {self.config.data_table_name} (
            CustomerID INTEGER PRIMARY KEY,
            Churn INTEGER,
            Tenure REAL,
            NumberOfDeviceRegistered INTEGER,
            PreferedOrderCat INTEGER,
            SatisfactionScore INTEGER,
            MaritalStatus INTEGER,
            Complain INTEGER,
            DaySinceLastOrder REAL,
            CashbackAmount REAL,
            TotalSpend REAL,
            ActivityFrequency REAL,
            IsLongTermCustomer INTEGER
        );
        ''')

        # Insert Transformed Data into Database
        df.to_sql(self.config.data_table_name, conn, if_exists="replace", index=False)
        conn.close()
    
    def data_transformation(self):
        
        df=pd.read_csv(self.config.processed+"churn_data_prepared.csv")
        # Feature Engineering
        # Aggregated Feature: Total spend per customer (Assuming CashbackAmount as a proxy for spend)
        df["TotalSpend"] = df["CashbackAmount"] * df["Tenure"]

        # Derived Feature: Activity Frequency
        df["ActivityFrequency"] = df["NumberOfDeviceRegistered"] / df["Tenure"].replace(0, 1)
        
        self.store_in_sql(df)
        
        
        

In [10]:
obj=DataTransformationStorage()
obj.data_transformation()

[2025-03-01 13:10:17,064 : INFO: common: yaml file config\config.yaml loaded successfully!]
[2025-03-01 13:10:17,080 : INFO: common: yaml file config\params.yaml loaded successfully!]
[2025-03-01 13:10:17,096 : INFO: common: yaml file config\schema.yaml loaded successfully!]
DataStorageConnectionConfig(db_path='config/churn_data.db', processed='data/processed/', data_table_name='transformed_churn_data')


In [15]:
pwd()

'c:\\Users\\asus\\Documents\\Projects\\ECommerce Customer Churn Prediction'

In [7]:
from pathlib import Path
import pandas as pd
df=pd.read_parquet(Path().cwd()/'data/processed/transformed_churn_data.parquet')
df

Unnamed: 0,Churn,Tenure,NumberOfDeviceRegistered,PreferedOrderCat,SatisfactionScore,MaritalStatus,Complain,DaySinceLastOrder,CashbackAmount,CustomerID,TotalSpend,ActivityFrequency,event_timestamp
0,1,-0.733989,3,2,2,2,1,0.151436,-0.351465,50001,0.257972,-4.087252,2025-03-01 22:35:06+00:00
1,1,-0.135704,4,3,3,2,1,-1.249003,-1.144715,50002,0.155342,-29.476013,2025-03-01 22:35:06+00:00
2,1,-0.135704,4,3,3,2,1,-0.408739,-1.157316,50003,0.157052,-29.476013,2025-03-01 22:35:06+00:00
3,1,-1.212618,4,2,5,2,0,-0.408739,-0.877047,50004,1.063523,-3.298648,2025-03-01 22:35:06+00:00
4,1,-1.212618,3,3,5,2,0,-0.408739,-0.967895,50005,1.173687,-2.473986,2025-03-01 22:35:06+00:00
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5625,0,-0.016046,2,2,1,1,0,-0.128651,-0.538854,55626,0.008647,-124.638638,2025-03-01 22:35:06+00:00
5626,0,0.342925,5,0,5,1,0,-0.408739,0.969195,55627,0.332361,14.580442,2025-03-01 22:35:06+00:00
5627,0,-1.092961,2,2,4,1,1,-0.128651,0.186920,55628,-0.204296,-1.829892,2025-03-01 22:35:06+00:00
5628,0,1.539497,5,2,4,1,0,1.271788,0.034083,55629,0.052471,3.247814,2025-03-01 22:35:06+00:00


In [8]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5630 entries, 0 to 5629
Data columns (total 13 columns):
 #   Column                    Non-Null Count  Dtype              
---  ------                    --------------  -----              
 0   Churn                     5630 non-null   int64              
 1   Tenure                    5630 non-null   float64            
 2   NumberOfDeviceRegistered  5630 non-null   int64              
 3   PreferedOrderCat          5630 non-null   int64              
 4   SatisfactionScore         5630 non-null   int64              
 5   MaritalStatus             5630 non-null   int64              
 6   Complain                  5630 non-null   int64              
 7   DaySinceLastOrder         5630 non-null   float64            
 8   CashbackAmount            5630 non-null   float64            
 9   CustomerID                5630 non-null   int64              
 10  TotalSpend                5630 non-null   float64            
 11  ActivityFrequency

In [30]:
# checking feature store
from feast import Feature, Entity, FeatureView, ValueType,FileSource,Field
from feast.types import Int64

In [18]:
#Define the entity (Customer)
customer=Entity(
    name="CustomerID",
    value_type=ValueType.INT64,
    description="Customer ID"
)


In [23]:
#Define the data source
data_source=FileSource(
    path=str(Path().cwd()/"data/processed/transformed_churn_data.parquet"),
    event_timestamp_column='event_timestamp'
)
data_source

<feast.infra.offline_stores.file_source.FileSource at 0x27f900bfcd0>

In [32]:
customer_features=FeatureView(
    name="customer_features", #Name of this feature view
    entities=[customer],
    ttl=None,
    schema=[
        Field(name="Tenure", dtype=Int64)
    ],
    online=True,
    source=data_source,
)

In [13]:
pwd()

'c:\\Users\\asus\\Documents\\Projects\\ECommerce Customer Churn Prediction\\churn_feature_store\\feature_repo'

In [12]:
os.chdir("churn_feature_store/feature_repo")

In [None]:
from feast import FeatureStore

store = FeatureStore(repo_path=".")  # Assumes feature_store.yaml is in the current directory

# Retrieve online features for a specific customer
online_features = store.get_online_features(
    features=["customer_features:TotalSpend", "customer_features:ActivityFrequency"],
    entity_rows=[{"CustomerID": 50001}],  # Replace with a valid CustomerID from your data
).to_dict()


print(online_features)

{'CustomerID': [50001], 'TotalSpend': [0.25797182415006437], 'ActivityFrequency': [-4.0872522866689085]}


  entity = cls(
  entity = cls(
  entity = cls(
