In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import pickle


In [10]:
# Load the synthetic data
df = pd.read_csv('synthetic_bank_customers.csv')
df['Target'].count

<bound method Series.count of 0                         Family
1                         Family
2                       Students
3                 Small business
4                 Small business
                 ...            
995                       Family
996               Small business
997                       Family
998               Small business
999    High Networth Individuals
Name: Target, Length: 1000, dtype: object>

In [11]:
# Encode categorical variables
label_encoders = {}
for column in ['Gender', 'Location', 'Online Behavior', 'Interests', 'Values', 'Lifestyle', 'Target']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Scale the data
scaler = StandardScaler()
df[['Age', 'Transaction History']] = scaler.fit_transform(df[['Age', 'Transaction History']])

# Ensure the scaler is fit on all features
feature_columns = ['Age', 'Transaction History', 'Gender', 'Location', 'Online Behavior', 'Interests', 'Values', 'Lifestyle']
scaler.fit(df[feature_columns])

# Split the data
X = df.drop(columns=['Customer ID', 'Name', 'Target'])
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Update the scaler to fit on the entire training set features
scaler.fit(X_train[feature_columns])

In [12]:
# Train the model
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)
# Predict on the test set
y_pred = clf.predict(X_test)

In [13]:
# Save the model and the encoders
with open('model.pkl', 'wb') as model_file:
    pickle.dump(clf, model_file)

with open('label_encoders.pkl', 'wb') as le_file:
    pickle.dump(label_encoders, le_file)

with open('scaler.pkl', 'wb') as scaler_file:
    pickle.dump(scaler, scaler_file)


# Load the model and other necessary objects
with open('model.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

with open('label_encoders.pkl', 'rb') as le_file:
    label_encoders = pickle.load(le_file)

with open('scaler.pkl', 'rb') as scaler_file:
    scaler = pickle.load(scaler_file)

# Check the label encoders' unique values
for column, le in label_encoders.items():
    print(f"{column} encoder classes: {le.classes_}")

Gender encoder classes: ['Female' 'Male' 'Non-binary']
Location encoder classes: ['Adelaide' 'Brisbane' 'Hobart' 'Melbourne' 'Perth' 'Sydney']
Online Behavior encoder classes: ['Active' 'Passive']
Interests encoder classes: ['Fashion' 'Finance' 'Music' 'Sports' 'Tech' 'Travel']
Values encoder classes: ['Convenience' 'Customer Service' 'Innovation' 'Low Fees' 'Security']
Lifestyle encoder classes: ['Business Owner' 'Family-Oriented' 'Retired' 'Single' 'Student']
Target encoder classes: ['Family' 'Frequent traveller' 'High Networth Individuals' 'Single'
 'Small business' 'Students' 'Tech savvy customer' 'Young clients']


In [14]:
importances = model.feature_importances_
feature_importance_df = pd.DataFrame({'Feature': X.columns, 'Importance': importances})
feature_importance_df = feature_importance_df.sort_values(by='Importance', ascending=False)
print(feature_importance_df)

               Feature  Importance
7            Lifestyle    0.324614
3  Transaction History    0.197658
0                  Age    0.156485
5            Interests    0.096439
2             Location    0.073374
6               Values    0.069308
1               Gender    0.051755
4      Online Behavior    0.030366


In [82]:
# Removed Gender, Values & online behavior based on feature importance
# Encode categorical variables 
label_encoders = {}
for column in ['Location',  'Interests', 'Lifestyle', 'Target']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders[column] = le

# Scale the data
scaler = StandardScaler()
df[['Age', 'Transaction History']] = scaler.fit_transform(df[['Age', 'Transaction History']])

In [83]:

# Ensure the scaler is fit on all features
feature_columns = ['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
scaler.fit(df[feature_columns])

# Split the data
X = df.drop(columns=['Customer ID', 'Name', 'Target'])
y = df['Target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Update the scaler to fit on the entire training set features
scaler.fit(X_train[feature_columns])

In [84]:
from sklearn.model_selection import GridSearchCV
from sklearn.ensemble import RandomForestClassifier

# Define the parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['sqrt', 'log2', None],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the RandomForestClassifier
clf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, error_score='raise')
grid_search.fit(X_train, y_train)

# Best estimator
best_clf = grid_search.best_estimator_


In [18]:
# Predict on the test set using the best estimator
y_pred = best_clf.predict(X_test)

# Evaluate the model
from sklearn.metrics import accuracy_score, classification_report

accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))

Accuracy: 0.71
Classification Report:
              precision    recall  f1-score   support

           0       0.96      0.73      0.83        59
           1       0.67      0.50      0.57        12
           2       0.88      0.90      0.89        49
           3       0.44      0.70      0.54        23
           4       0.55      0.88      0.67        33
           5       1.00      0.75      0.86         4
           6       0.33      0.07      0.12        14
           7       1.00      0.17      0.29         6

    accuracy                           0.71       200
   macro avg       0.73      0.59      0.60       200
weighted avg       0.75      0.71      0.70       200



In [19]:
## Can try other techniques Cross-Validation with Stratified K-Fold
## Ensure the training and validation sets maintain the same distribution of target classes.
from sklearn.model_selection import StratifiedKFold
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [22]:
## Ensemble Methods Combine the predictions of multiple models.
from sklearn.ensemble import GradientBoostingClassifier, StackingClassifier
from xgboost import XGBClassifier
from lightgbm import LGBMClassifier

# Define the ensemble estimators
estimators = [
    ('rf', best_clf),
    ('xgb', XGBClassifier(random_state=42)),
    ('lgbm', LGBMClassifier(random_state=42))
]

# Initialize and train the StackingClassifier
stack_clf = StackingClassifier(estimators=estimators, final_estimator=GradientBoostingClassifier(random_state=42))
stack_clf.fit(X_train, y_train)
y_pred = stack_clf.predict(X_test)

# Evaluate the model
from sklearn.metrics import accuracy_score, classification_report
accuracy = accuracy_score(y_test, y_pred)
print(f'Accuracy: {accuracy:.2f}')
print('Classification Report:')
print(classification_report(y_test, y_pred))




[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000916 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 346
[LightGBM] [Info] Number of data points in the train set: 800, number of used features: 8
[LightGBM] [Info] Start training from score -1.108663
[LightGBM] [Info] Start training from score -2.421932
[LightGBM] [Info] Start training from score -1.566618
[LightGBM] [Info] Start training from score -2.021173
[LightGBM] [Info] Start training from score -1.939680
[LightGBM] [Info] Start training from score -3.688879
[LightGBM] [Info] Start training from score -3.188104
[LightGBM] [Info] Start training from score -3.506558
[LightGBM] [Info] Auto-choosing col-wise multi-threading, the overhead of testing was 0.000188 seconds.
You can set `force_col_wise=true` to remove the overhead.
[LightGBM] [Info] Total Bins 305
[LightGBM] [Info] Number

In [97]:
## pickle the new model with accuracy of 71% 
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
# Encode categorical variables

label_encoders_new = {}
for column in ['Location', 'Interests', 'Lifestyle', 'Target']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders_new[column] = le

# Prepare features for scaling
features_for_scaling = df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']]

# Scale the data
scaler_new = StandardScaler()
scaled_features = scaler_new.fit_transform(features_for_scaling)

# Replace scaled values in the DataFrame
df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']] = scaled_features

# Splitting into features and target
X = df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']]
y = df['Target']

# Fit a RandomForestClassifier (Example)
# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, error_score='raise')
grid_search.fit(X_train, y_train)

# Best estimator
best_clf = grid_search.best_estimator_

best_clf.fit(X, y)

# Save the model and the encoders
with open('model_new.pkl', 'wb') as model_file:
    pickle.dump(best_clf, model_file)
with open('label_encoders_new.pkl', 'wb') as le_file:
    pickle.dump(label_encoders_new, le_file)
with open('scaler_new.pkl', 'wb') as scaler_file:
    pickle.dump(scaler_new, scaler_file)

# Load the new model and other necessary objects
with open('model_new.pkl', 'rb') as model_file:
    model_new = pickle.load(model_file)
with open('label_encoders_new.pkl', 'rb') as le_file:
    label_encoders_new = pickle.load(le_file)
with open('scaler_new.pkl', 'rb') as scaler_file:
    scaler_new = pickle.load(scaler_file)




In [6]:
import openai
import pickle
import numpy as np
from docx import Document
import re

# Set your Azure OpenAI endpoint and API key
openai.api_type = "azure"
openai.api_base = "https://nat-instance.openai.azure.com/"
openai.api_version = "2024-05-01-preview"
openai.api_key = '30xR3O0n6r5Z2Fyven6NUiaEcFL70s60iVhgIc7n96Hn6gsSp4hKJQQJ99AJACYeBjFXJ3w3AAABACOG5LtX'


In [134]:
# Function to predict customer category using new features
# Function to handle unseen labels
def safe_transform(encoder, value):
    try:
        return encoder.transform([value])[0]
    except ValueError:
        # If the value is unseen, return a default or average encoding
        return np.median(encoder.transform(encoder.classes_))

# Function to predict customer category using new features
# Function to predict customer category using new features
def predict_customer_category(data):
    encoded_data = {}
    for column, le in label_encoders_new.items():
        if column in data:
            encoded_data[column] = safe_transform(le, data[column])
    print(f"Encoded data: {encoded_data}")

    features = pd.DataFrame([[
        data['Age'],
        data['Transaction History'],
        encoded_data.get('Location', data['Location']),
        encoded_data.get('Interests', data['Interests']),
        encoded_data.get('Lifestyle', data['Lifestyle'])
    ]], columns=['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle'])

    print(f"Features before scaling: {features}")
    features = scaler_new.transform(features)
    print(f"Features after scaling: {features}")

    prediction_numeric = model_new.predict(features)[0]
    prediction_label = label_encoders_new['Target'].inverse_transform([prediction_numeric])[0]
    print(f"Decoded Prediction: {prediction_label}")
    return prediction_label


# Map products and offers to target audiences
def get_product_details(target_audience):
    print("Inside get_product_details and target_audience is:", target_audience)
    products = {
        'Young clients': ("Youth Savings Account", "No monthly fees for the first year"),
        'Family': ("Family Package Account", "Lower interest rates on family loans"),
        'High Networth Individuals': ("Premium Credit Card", "Exclusive rewards and travel benefits"),
        'Small business': ("Business Checking Account", "Free transactions for the first six months"),
        'Students': ("Student Banking Account", "Cashback on all purchases"),
        'Tech savvy customer': ("Tech Savvy Account", "High interest on digital transactions"),
        'Frequent traveller': ("Travel Credit Card", "Double points on travel expenses")
    }
    return products.get(target_audience, ("General Banking Account", "Competitive interest rates"))

def generate_campaign_email(target_audience):
    # Ensure target_audience is decoded if it's numeric
    if isinstance(target_audience, (int, np.integer)):
        target_audience = label_encoders_new['Target'].inverse_transform([target_audience])[0]
    print(f"Target Audience after decoding: {target_audience}")
    
    product_name, offer_details = get_product_details(target_audience)
    print(f"Product: {product_name}, Offer: {offer_details}")  # Debugging statement
    
    prompt = f"""
    Create a detailed marketing email for the following Australian Bank Campaign:
    Product: {product_name}
    Offer: {offer_details}
    Target Audience: {target_audience}
    
    The email should be engaging, persuasive, and tailored to the interests and motivations of {target_audience}. Highlight the benefits of the product, the offer details, and include a call-to-action.
    """
    
    response = openai.Completion.create(
        engine="gpt-35-turbo",  # Use the appropriate engine deployed in your Azure OpenAI resource
        prompt=prompt,
        max_tokens=300
    )
    
    email_content = response.choices[0].text.strip()
    clean_content = re.sub('<[^<]+?>', '', email_content)  # Remove HTML tags
    
    print("Email content:", clean_content)  # Debugging statement
    return clean_content


# Function to save email content to a Microsoft Word document
def save_to_word(email_content, filename="campaign_email.docx"):
    doc = Document()
    doc.add_heading('Campaign Email', 0)

    # Adding paragraphs for email content
    doc.add_paragraph(email_content)

    doc.save(filename)
    print(f"Email content saved to {filename}")
    



In [137]:
# Example input data
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 20,
    'Transaction History': 5000,
    'Location': 'Adelaide',
    'Interests': 'Sports',
    'Lifestyle': 'Single'
}

In [138]:
# Predict customer category
predicted_category = predict_customer_category(sample_data)
print(f"Predicted category: {predicted_category}")

# Generate campaign email based on predicted category
email_content = generate_campaign_email(predicted_category)
print(f"\nGenerated Campaign Email:\n{email_content}")

# Optionally save the email content to a Word document
#save_to_word(email_content, "campaign_email.docx")

Encoded data: {'Location': np.float64(2.5), 'Interests': np.float64(2.5), 'Lifestyle': np.float64(2.0)}
Features before scaling:    Age  Transaction History  Location  Interests  Lifestyle
0   20                 5000       2.5        2.5        2.0
Features after scaling: [[ 2.00000000e+01  5.00000000e+03 -4.09407180e-02 -7.46851315e-03
   3.26021415e-01]]
Decoded Prediction: 2
Predicted category: 2
Target Audience after decoding: 2
Inside get_product_details and target_audience is: 2
Product: General Banking Account, Offer: Competitive interest rates




Email content: ## Structuring the E-mail

1. Subject Line — Offering you an Asymmetric Advantage Over Your Expenses

2. Salutation — Hi Amit,

3. Opening Paragraph — I’m thrilled to know that you’ve been giving careful consideration to your finances. We are delighted to introduce you to our General Banking Account with competitive interest rates to help you navigate your finances and enjoy swift access to unconditional banking service.

4. Second Paragraph —Unequivocally, one of the best ways you can show commitment to your finances is by choosing a bank account with deposit rates and fees that work for you personally. Our General Banking Account offers a highly competitive interest rate to help you grow your savings faster; you can access a rate that is nearly 9x the average interest rates on savings account. 

5. Third Paragraph — Our banking platform is embedded with first-class audible security and runs on world class infrastructure that assures you the peace of mind you need to fo

In [None]:
## Below code without email template 
import pandas as pd
import numpy as np
import re
import openai
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from docx import Document

# Load the dataset
df = pd.read_csv('synthetic_bank_customers.csv')

# Encode categorical variables
label_encoders_new = {}
for column in ['Location', 'Interests', 'Lifestyle', 'Target']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders_new[column] = le

# Prepare features for scaling
features_for_scaling = df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']]

# Scale the data
scaler_new = StandardScaler()
scaled_features = scaler_new.fit_transform(features_for_scaling)

# Replace scaled values in the DataFrame
df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']] = scaled_features

# Splitting into features and target
X = df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']]
y = df['Target']

# Define the parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['sqrt', 'log2', None],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the RandomForestClassifier
clf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, error_score='raise')
grid_search.fit(X, y)

# Best estimator
best_clf = grid_search.best_estimator_

# Save the model and the encoders
with open('model_new.pkl', 'wb') as model_file:
    pickle.dump(best_clf, model_file)
with open('label_encoders_new.pkl', 'wb') as le_file:
    pickle.dump(label_encoders_new, le_file)
with open('scaler_new.pkl', 'wb') as scaler_file:
    pickle.dump(scaler_new, scaler_file)

# Load the new model and other necessary objects
with open('model_new.pkl', 'rb') as model_file:
    model_new = pickle.load(model_file)
with open('label_encoders_new.pkl', 'rb') as le_file:
    label_encoders_new = pickle.load(le_file)
with open('scaler_new.pkl', 'rb') as scaler_file:
    scaler_new = pickle.load(scaler_file)

# Function to handle unseen labels
def safe_transform(encoder, value):
    try:
        return encoder.transform([value])[0]
    except ValueError:
        # If the value is unseen, return a default or average encoding
        return np.median(encoder.transform(encoder.classes_))

# Function to predict customer category using new features
def predict_customer_category(data):
    encoded_data = {}
    for column, le in label_encoders_new.items():
        if column in data:
            encoded_data[column] = safe_transform(le, data[column])
    print(f"Encoded data: {encoded_data}")

    features = pd.DataFrame([[
        data['Age'],
        data['Transaction History'],
        encoded_data.get('Location', data['Location']),
        encoded_data.get('Interests', data['Interests']),
        encoded_data.get('Lifestyle', data['Lifestyle'])
    ]], columns=['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle'])

    print(f"Features before scaling: {features}")
    features = scaler_new.transform(features)
    print(f"Features after scaling: {features}")

    prediction_numeric = model_new.predict(features)[0]
    prediction_label = label_encoders_new['Target'].inverse_transform([prediction_numeric])[0]
    print(f"Decoded Prediction: {prediction_label}")
    return prediction_label

# Map products and offers to target audiences
def get_product_details(target_audience):
    print("Inside get_product_details and target_audience is:", target_audience)
    products = {
        'Young clients': ("Youth Savings Account", "No monthly fees for the first year"),
        'Family': ("Family Package Account", "Lower interest rates on family loans"),
        'High Networth Individuals': ("Premium Credit Card", "Exclusive rewards and travel benefits"),
        'Small business': ("Business Checking Account", "Free transactions for the first six months"),
        'Students': ("Student Banking Account", "Cashback on all purchases"),
        'Tech savvy customer': ("Tech Savvy Account", "High interest on digital transactions"),
        'Frequent traveller': ("Travel Credit Card", "Double points on travel expenses")
    }
    return products.get(target_audience, ("General Banking Account", "Competitive interest rates"))

# Function to generate campaign email using Azure OpenAI
def generate_campaign_email(target_audience):
    # Ensure target_audience is decoded if it's numeric
    if isinstance(target_audience, (int, np.integer)):
        target_audience = label_encoders_new['Target'].inverse_transform([target_audience])[0]
    print(f"Target Audience after decoding: {target_audience}")
    
    product_name, offer_details = get_product_details(target_audience)
    print(f"Product: {product_name}, Offer: {offer_details}")  # Debugging statement
    
    prompt = f"""
    Create a detailed marketing email for the following Australian Bank Campaign:
    Product: {product_name}
    Offer: {offer_details}
    Target Audience: {target_audience}
    
    The email should be engaging, persuasive, and tailored to the interests and motivations of {target_audience}. Highlight the benefits of the product, the offer details, and include a call-to-action.
    """
    
    response = openai.Completion.create(
        engine="gpt-35-turbo",  # Use the appropriate engine deployed in your Azure OpenAI resource
        prompt=prompt,
        max_tokens=300
    )
    
    email_content = response.choices[0].text.strip()
    clean_content = re.sub('<[^<]+?>', '', email_content)  # Remove HTML tags
    
    print("Email content:", clean_content)  # Debugging statement
    return clean_content

# Function to save email content

In [1]:
## Below code with email template 
import pandas as pd
import numpy as np
import re
import openai
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import GridSearchCV
from docx import Document

# Load the dataset
df = pd.read_csv('synthetic_bank_customers.csv')

# Encode categorical variables
label_encoders_new = {}
for column in ['Location', 'Interests', 'Lifestyle', 'Target']:
    le = LabelEncoder()
    df[column] = le.fit_transform(df[column])
    label_encoders_new[column] = le

# Prepare features for scaling
features_for_scaling = df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']]

# Scale the data
scaler_new = StandardScaler()
scaled_features = scaler_new.fit_transform(features_for_scaling)

# Replace scaled values in the DataFrame
df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']] = scaled_features

# Splitting into features and target
X = df[['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']]
y = df['Target']

# Define the parameter grid for GridSearchCV
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_features': ['sqrt', 'log2', None],
    'max_depth': [None, 10, 20, 30],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize the RandomForestClassifier
clf = RandomForestClassifier(random_state=42)

# Initialize GridSearchCV
grid_search = GridSearchCV(estimator=clf, param_grid=param_grid, cv=5, scoring='accuracy', n_jobs=-1, error_score='raise')
grid_search.fit(X, y)

# Best estimator
best_clf = grid_search.best_estimator_

# Save the model and the encoders
with open('model_new.pkl', 'wb') as model_file:
    pickle.dump(best_clf, model_file)
with open('label_encoders_new.pkl', 'wb') as le_file:
    pickle.dump(label_encoders_new, le_file)
with open('scaler_new.pkl', 'wb') as scaler_file:
    pickle.dump(scaler_new, scaler_file)

# Load the new model and other necessary objects
with open('model_new.pkl', 'rb') as model_file:
    model_new = pickle.load(model_file)
with open('label_encoders_new.pkl', 'rb') as le_file:
    label_encoders_new = pickle.load(le_file)
with open('scaler_new.pkl', 'rb') as scaler_file:
    scaler_new = pickle.load(scaler_file)

# Function to handle unseen labels
def safe_transform(encoder, value):
    try:
        return encoder.transform([value])[0]
    except ValueError:
        # If the value is unseen, return a default or average encoding
        return np.median(encoder.transform(encoder.classes_))

# Function to predict customer category using new features
def predict_customer_category(data):
    encoded_data = {}
    for column, le in label_encoders_new.items():
        if column in data:
            encoded_data[column] = safe_transform(le, data[column])
    print(f"Encoded data: {encoded_data}")

    features = pd.DataFrame([[
        data['Age'],
        data['Transaction History'],
        encoded_data.get('Location', data['Location']),
        encoded_data.get('Interests', data['Interests']),
        encoded_data.get('Lifestyle', data['Lifestyle'])
    ]], columns=['Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle'])

    print(f"Features before scaling: {features}")
    features = scaler_new.transform(features)
    print(f"Features after scaling: {features}")

    prediction_numeric = model_new.predict(features)[0]
    prediction_label = label_encoders_new['Target'].inverse_transform([prediction_numeric])[0]
    print(f"Decoded Prediction: {prediction_label}")
    return prediction_label

# Map products and offers to target audiences
def get_product_details(target_audience):
    print("Inside get_product_details and target_audience is:", target_audience)
    products = {
        'Young clients': ("Youth Savings Account", "No monthly fees for the first year"),
        'Family': ("Family Package Account", "Lower interest rates on family loans"),
        'High Networth Individuals': ("Premium Credit Card", "Exclusive rewards and travel benefits"),
        'Small business': ("Business Checking Account", "Free transactions for the first six months"),
        'Students': ("Student Banking Account", "Cashback on all purchases"),
        'Tech savvy customer': ("Tech Savvy Account", "High interest on digital transactions"),
        'Frequent traveller': ("Travel Credit Card", "Double points on travel expenses")
    }
    return products.get(target_audience, ("General Banking Account", "Competitive interest rates"))

# Define email templates
email_templates = {
    'Young clients': """
Hello there!

As you start your financial journey, we're here to help you build a solid foundation. Check out our special savings accounts and investment options tailored just for you. {product_name} offers {offer_details}. Let's grow together!

Best regards,
Your Bank
""",
    'Family': """
Dear Valued Customer,

Family means everything, and we want to support yours. Discover our family-friendly financial products, from home loans to education savings plans. {product_name} offers {offer_details}. We're here to help you secure your family's future.

Warm regards,
Your Bank
""",
    'High Networth Individuals': """
Esteemed Client,

Your achievements deserve personalized financial solutions. Explore our exclusive wealth management services designed to help you maximize your investments and safeguard your assets. {product_name} offers {offer_details}.

Sincerely,
Your Bank
""",
    'Small business': """
Dear Business Owner,

Running a small business is challenging, but we're here to support you. Our tailored business accounts, loans, and advisory services are designed to help you succeed. {product_name} offers {offer_details}. Let's grow your business together!

Best wishes,
Your Bank
""",
    'Students': """
Hello,

Balancing studies and finances can be tough. That's why we've created special student accounts and loan options to help you manage your money and focus on your education. {product_name} offers {offer_details}. Let's succeed together!

Best,
Your Bank
""",
    'Tech savvy customer': """
Hey there,

Stay ahead with our cutting-edge digital banking solutions. From secure online transactions to innovative financial apps, we have everything you need to manage your money efficiently. {product_name} offers {offer_details}.

Cheers,
Your Bank
""",
    'Frequent traveller': """
Dear Globetrotter,

Travel the world with confidence using our travel-friendly banking services. Enjoy low-fee international transactions, travel insurance, and 24/7 global support. {product_name} offers {offer_details}. Your financial peace of mind is our priority.

Bon voyage,
Your Bank
"""
}

# Function to generate campaign email using OpenAI
def generate_campaign_email(target_audience):
    # Ensure target_audience is decoded if it's numeric
    if isinstance(target_audience, (int, np.integer)):
        target_audience = label_encoders_new['Target'].inverse_transform([target_audience])[0]
    print(f"Target Audience after decoding: {target_audience}")

    product_name, offer_details = get_product_details(target_audience)
    print(f"Product: {product_name}, Offer: {offer_details}")  # Debugging statement

    email_template = email_templates.get(target_audience, email_templates['Family'])  # Default to 'Family' template if not found
    
    # Define the prompt for OpenAI using the selected template
    prompt = f"""
    Create a professional and engaging marketing email for the following Australian Bank Campaign:
    {email_template.format(product_name=product_name, offer_details=offer_details)}
    """

    response = openai.Completion.create(
        engine="gpt-35-turbo",  # Use the appropriate engine deployed in your Azure OpenAI resource
        prompt=prompt,
        max_tokens=300
    )

    email_content = response.choices[0].text.strip()
    clean_content = re.sub('<[^<]+?>', '', email_content)  # Remove HTML tags

    print("Email content:", clean_content)  # Debugging statement
    return clean_content



# Function to save email content

In [2]:
# Example input data 1 (family)
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 65,
    'Transaction History': 25000,
    'Location': 'Sydney',
    'Interests': 'Fashion',
    'Lifestyle': 'Family-Oriented'
}

In [3]:
# Example input data 2 (student)
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 19,
    'Transaction History': 25000,
    'Location': 'Perth',
    'Interests': 'Tech',
    'Lifestyle': 'Student'
}

In [None]:
# Example input data 3 (Small business)
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 65,
    'Transaction History': 11000,
    'Location': 'Brisbane',
    'Interests': 'Travel',
    'Lifestyle': 'Business Owner'
}

In [157]:
# Example input data 4 (Young clients)
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 25,
    'Transaction History': 23000,
    'Location': 'Melbourne',
    'Interests': 'Tech',
    'Lifestyle': 'Single'
}

In [None]:
# Example input data 5 (Frequent traveller)
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 45,
    'Transaction History': 23000,
    'Location': 'Hobart',
    'Interests': 'Travel',
    'Lifestyle': 'Single'
}

In [None]:
# Example input data 6 (High Networth Individuals)
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 35,
    'Transaction History': 49000,
    'Location': 'Melbourne',
    'Interests': 'Tech',
    'Lifestyle': 'Single'
}

In [None]:
# Example input data 7 (Tech savvy customer)  -- need to fix
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 45,
    'Transaction History': 15000,
    'Location': 'Hobart',
    'Interests': 'Tech',
    'Lifestyle': 'Retired'
}

In [None]:
# Example input data 8 (Single)
## 'Age', 'Transaction History', 'Location', 'Interests', 'Lifestyle']
sample_data = {
    'Age': 50,
    'Transaction History': 5000,
    'Location': 'Hobart',
    'Interests': 'Innovation',
    'Lifestyle': 'Single'
}

In [7]:
# Predict customer category
predicted_category = predict_customer_category(sample_data)
print(f"Predicted category: {predicted_category}")

# Generate campaign email based on predicted category
email_content = generate_campaign_email(predicted_category)
print(f"\nGenerated Campaign Email:\n{email_content}")

# Optionally save the email content to a Word document
#save_to_word(email_content, "campaign_email.docx")

Encoded data: {'Location': np.int64(4), 'Interests': np.int64(4), 'Lifestyle': np.int64(4)}
Features before scaling:    Age  Transaction History  Location  Interests  Lifestyle
0   19                25000         4          4          4
Features after scaling: [[-1.72034468 -0.02518186  0.8082272   0.83608812  1.96021397]]
Decoded Prediction: Students
Predicted category: Students
Target Audience after decoding: Students
Inside get_product_details and target_audience is: Students
Product: Student Banking Account, Offer: Cashback on all purchases




Email content: ```
     
     
#### Solution:

```python

Greetings = "Hello,"
Description = "Balancing studies and finances can be tough. That's why we've created special student accounts and loan options to help you manage your money and focus on your education. Student Banking Account offers Cashback on all purchases. Let's succeed together!"
FromName = "Your Bank"
EmailTemplate = "Create Email Template"
EmailSubject = "Student Banking Available"
DistributionList = ['john.doe@gmail.com', 'jane.spicket@gmail.com']
MailSender(EmailTemplate, Greetings, EmailSubject, Description, FromName, DistributionList )

``` 
Note:

Here, we call  MailSender should be called with a dictionary which will contain the subject, greeting, message, and the name of the company. Along with the dictionary, we pass the list of emails to which the email will be sent.

## Conclusion:

In this notebook, we have created one dummy function named MailSender. 

We have created this function to show how we can defin