In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import joblib  # for saving the model

full_featured_df = pd.read_csv('data/Full_CRM_Campaign_Dataset_with_Shoe_Categories.csv')
# Load your full_featured_df (replace this with your actual DataFrame if needed)
df = full_featured_df.copy()
df = df.drop(columns=['GDPPerCapita'])
# Define targets and features
targets = ['ROI (%)', 'ConversionRate', 'CTR', 'RevenuePerUser']
features = df.columns.difference(targets)

X = df[features]
y = df[targets]

# Separate categorical and numeric columns
categorical_cols = X.select_dtypes(include=['object', 'bool']).columns.tolist()
numeric_cols = X.select_dtypes(include=['int64', 'float64']).columns.tolist()

# Preprocessing pipeline
preprocessor = ColumnTransformer([
    ('cat', OneHotEncoder(handle_unknown='ignore'), categorical_cols)
], remainder='passthrough')

# Full modeling pipeline
pipeline = Pipeline(steps=[
    ('preprocessor', preprocessor),
    ('regressor', RandomForestRegressor(n_estimators=100, random_state=42, n_jobs=-1))
])

# Train-test split (for validation)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
pipeline.fit(X_train, y_train)

# Save the entire pipeline
joblib.dump(pipeline, 'crm_kpi_predictor.pkl')
print("Model saved as crm_kpi_predictor.pkl")


Model saved as crm_kpi_predictor.pkl


In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import joblib  # for saving the model

full_featured_df = pd.read_csv('Full_CRM_Campaign_Dataset_with_Shoe_Categories.csv')
full_featured_df.columns

Index(['Country', 'Segment', 'Channel', 'CampaignType', 'DiscountValue',
       'PersonalizationLevel', 'MarketType', 'DayOfWeek', 'Month', 'IsWeekend',
       'ProductCategory', 'DiscountType', 'LanguageStyle', 'VisualIntensity',
       'IsRetargeting', 'IsExclusive', 'GDPPerCapita', 'AvgCustomerAge',
       'GenderFemaleRatio', 'Impressions', 'ROI (%)', 'ConversionRate', 'CTR',
       'RevenuePerUser'],
      dtype='object')