In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import json

## Read and agg data

In [None]:
customerprofile = pd.read_csv(r'data\customerprofile.csv')
customerprofile = customerprofile.drop(columns=['AccountType'])
Merchant = pd.read_csv(r'data\Merchant.csv')
MerchantOffers = pd.read_csv(r'data\MerchantOffers.csv')
Transaction = pd.read_csv(r'Transaction_Egypt.csv')
Transaction['Amount'] = (Transaction['Amount'] * np.abs(np.random.uniform(0.5, 50, size=len(Transaction))))
# MerchantOffers.drop(['Offer_Start_Date','Offer_End_Date'],axis=1, inplace=True)
Transaction.head()

In [None]:
customerprofile.head()

In [None]:
merged_data = pd.merge(customerprofile,Transaction, on='CustomerID')
merged_data = pd.merge(merged_data, Merchant, on='MerchantID')
merged_data = pd.merge(merged_data, MerchantOffers, on='MerchantID')
merged_data.to_csv('mergedata.csv',index=False)
merged_data.head()

In [5]:
merged_data['TransactionDateTime'] = pd.to_datetime(merged_data['TransactionDateTime'])
merged_data['Day'] = merged_data['TransactionDateTime'].dt.day
merged_data['Month'] = merged_data['TransactionDateTime'].dt.month

## build the costumer profile

In [None]:
# avg_spending and srd_spending for each customer
customer_avg_amount = (merged_data.groupby('CustomerID')['Amount'].agg(['mean', 'std']).reset_index().rename(columns={'mean': 'avg_spending', 'std': 'std_spending'}))
customer_profile = customerprofile.merge(customer_avg_amount, on='CustomerID', how='left')
# Display as JSON
print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [7]:
# Calculate total number of transactions per customer
# customer_total_transactions = merged_data.groupby('CustomerID')['TransactionID'].count().reset_index().rename(columns={'TransactionID': 'number_of_transactions'})
# customer_profile = customer_profile.merge(customer_total_transactions, on='CustomerID', how='left')
# print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [9]:
def nest_codes(row,name,pivot):
    # data = {x  : row[x] for x in pivot }
    data = {x  : row[x] for x in pivot if row[x] >0 }
    row = row.drop(pivot)
    row[name] = dict(sorted(data.items(), key=lambda x: x[1], reverse=True))
    return row

In [None]:
# Calculate frequency of transactions in different MCC categories
mcc_frequency = merged_data.groupby(['CustomerID', 'MCC'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='CustomerID', columns='MCC', values='TransactionID').fillna(0)
mcc_pivot.columns = mcc_pivot.columns.astype(str)  # Convert column names to strings

customer_profile = customer_profile.merge(mcc_pivot, on='CustomerID', how='left')
customer_profile = customer_profile.apply(lambda x : nest_codes(x,"transactions_per_MCC_Code",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
# Calculate frequency of transactions in different MCC categories
mcc_frequency = merged_data.groupby(['CustomerID', 'Merchant_Name'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='CustomerID', columns='Merchant_Name', values='TransactionID').fillna(0)
mcc_pivot.columns = mcc_pivot.columns.astype(str)
customer_profile = customer_profile.merge(mcc_pivot, on='CustomerID', how='left')

customer_profile = customer_profile.apply(lambda x : nest_codes(x,"transactions_per_Merchant_Names",mcc_pivot.columns),axis=1)
print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
# Calculate frequency of transactions in different MCC categories
mcc_frequency = merged_data.groupby(['CustomerID', 'SubCategory'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='CustomerID', columns='SubCategory', values='TransactionID').fillna(0)
customer_profile = customer_profile.merge(mcc_pivot, on='CustomerID', how='left')

customer_profile = customer_profile.apply(lambda x : nest_codes(x,"transactions_per_SubCategory",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['CustomerID', 'LocationCity'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='CustomerID', columns='LocationCity', values='TransactionID').fillna(0)
customer_profile = customer_profile.merge(mcc_pivot, on='CustomerID', how='left')

customer_profile = customer_profile.apply(lambda x : nest_codes(x,"transactions_per_LocationCity",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['CustomerID', 'Day'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='CustomerID', columns='Day', values='TransactionID').fillna(0)
customer_profile = customer_profile.merge(mcc_pivot, on='CustomerID', how='left')

customer_profile = customer_profile.apply(lambda x : nest_codes(x,"transactions_per_Days",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['CustomerID', 'AccountType'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='CustomerID', columns='AccountType', values='TransactionID').fillna(0)
customer_profile = customer_profile.merge(mcc_pivot, on='CustomerID', how='left')

customer_profile = customer_profile.apply(lambda x : nest_codes(x,"transactions_per_AccountTypes",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['CustomerID', 'Offer_Description'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='CustomerID', columns='Offer_Description', values='TransactionID').fillna(0)
customer_profile = customer_profile.merge(mcc_pivot, on='CustomerID', how='left')

customer_profile = customer_profile.apply(lambda x : nest_codes(x,"Offer_Description",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(customer_profile.head(1).to_json(orient='records')), indent=4))

## marchant profile

In [None]:
Merchant.head()

In [None]:
MerchantOffers.head()

In [None]:
mcc_frequency = merged_data.groupby(['MerchantID','Sex','MaritalStatus'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='MerchantID', columns=['MaritalStatus','Sex'], values='TransactionID').fillna(0)
mcc_pivot.columns = ['_'.join(col).strip() for col in mcc_pivot.columns]
merchant_profile = Merchant.merge(mcc_pivot, on='MerchantID')
print(json.dumps(json.loads(merchant_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['MerchantID','HouseLocation'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='MerchantID', columns=['HouseLocation'], values='TransactionID').fillna(0)
merchant_profile = merchant_profile.merge(mcc_pivot, on='MerchantID')
merchant_profile = merchant_profile.apply(lambda x : nest_codes(x,"HouseLocation",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(merchant_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['MerchantID','Age'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='MerchantID', columns=['Age'], values='TransactionID').fillna(0)
merchant_profile = merchant_profile.merge(mcc_pivot, on='MerchantID')
merchant_profile = merchant_profile.apply(lambda x : nest_codes(x,"Ages",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(merchant_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['MerchantID','AccountType'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='MerchantID', columns=['AccountType'], values='TransactionID').fillna(0)
merchant_profile = merchant_profile.merge(mcc_pivot, on='MerchantID')
merchant_profile = merchant_profile.apply(lambda x : nest_codes(x,"AccountType",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(merchant_profile.head(1).to_json(orient='records')), indent=4))

In [None]:
mcc_frequency = merged_data.groupby(['MerchantID','Offer_Description'])['TransactionID'].count().reset_index()
mcc_pivot = mcc_frequency.pivot(index='MerchantID', columns=['Offer_Description'], values='TransactionID').fillna(0)
merchant_profile = merchant_profile.merge(mcc_pivot, on='MerchantID')
merchant_profile = merchant_profile.apply(lambda x : nest_codes(x,"Offer_Description",mcc_pivot.columns),axis=1)

print(json.dumps(json.loads(merchant_profile.head(1).to_json(orient='records')), indent=4))

## LLM

In [None]:
# import google.generativeai as genai
# from langchain.prompts import PromptTemplate
# from langchain.chains import LLMChain
# import json
# import google.generativeai as genai

# # Configure the API key
# genai.configure(api_key="AIzaSyCjQvCYWiYVJCLcCQz-dand6vT6wionxck")

# # Initialize the GenerativeModel
# model = genai.GenerativeModel("gemini-1.5-flash")

In [2]:
# # Load the JSON file
# with open('merchant_profile.json', 'r') as file:
#     merchant_data = json.load(file)

# with open('customer_profile.json', 'r') as file:
#     customer_data = json.load(file)

# # # Print the JSON data to verify
# # print(json.dumps(json_data, indent=4))

In [3]:
# def json_to_text(data):
#     summary = "Here is the data about merchants:\n"
#     for merchant in data:
#         summary += f"- Merchant ID: {merchant['MerchantID']}, Name: {merchant['Merchant_Name']}, Category: {merchant['SubCategory']}, Location: {merchant['Location']}\n"
#         summary += f"  - House Locations: {', '.join([f'{k}: {v}' for k, v in merchant['HouseLocation'].items()])}\n"
#         summary += f"  - Ages: {', '.join([f'{k}: {v}' for k, v in merchant['Ages'].items()])}\n"
#         summary += f"  - Account Types: {', '.join([f'{k}: {v}' for k, v in merchant['AccountType'].items()])}\n"
#         summary += f"  - Offers: {', '.join([f'{k}: {v}' for k, v in merchant['Offer_Description'].items()])}\n"
#     return summary

# # def json_to_text_customer(data):
# #     summary = "Here is the data about customers:\n"
# #     for customer in data:
# #         summary += f"- Customer ID: {customer['CustomerID']}, Name: {customer['Name']}, Age: {customer['Age']}, Location: {customer['HouseLocation']}\n"
# #         summary += f"  - Marital Status: {customer['MaritalStatus']}, Account Balance: {customer['AccountBalance']}, Risk: {customer['Risk']}\n"
# #         summary += f"  - Transactions by MCC Code: {', '.join([f'{k}: {v}' for k, v in customer['transactions_per_MCC_Code'].items()])}\n"
# #         summary += f"  - Transactions by Merchant Names: {', '.join([f'{k}: {v}' for k, v in customer['transactions_per_Merchant_Names'].items()])}\n"
# #         summary += f"  - Transactions by Subcategory: {', '.join([f'{k}: {v}' for k, v in customer['transactions_per_SubCategory'].items()])}\n"
# #         summary += f"  - Transactions by Location: {', '.join([f'{k}: {v}' for k, v in customer['transactions_per_LocationCity'].items()])}\n"
# #         summary += f"  - Transactions by Days: {', '.join([f'{k}: {v}' for k, v in customer['transactions_per_Days'].items()])}\n"
# #         summary += f"  - Transactions by Account Types: {', '.join([f'{k}: {v}' for k, v in customer['transactions_per_AccountTypes'].items()])}\n"
# #         summary += f"  - Offers: {', '.join([f'{k}: {v}' for k, v in customer['Offer_Description'].items()])}\n"
# #     return summary

# # customer_summary = json_to_text_customer(customer_data)
# merchant_data = json_to_text(merchant_data)


In [4]:
# # Start a chat session
# chat = model.start_chat(history=[])

# # Provide the JSON data summary as context
# response1 = chat.send_message(
#     f"Here is some data about merchants. Please remember this information for future questions:\n{data_summary}"
# )

# response2 = chat.send_message(
#     f"Here is some data about customers. Please remember this information for future questions:\n{customer_summary}"
# )
# print(response1.text , response2.text)

In [5]:
# # Ask a question about the data
# response = chat.send_message("What is the most popular offer for McDonald's?")
# print(response.text)


# # Ask another question
# response = chat.send_message("Which location has the most customers?")
# print(response.text)

### gimini

In [None]:
# # Load customer and merchant data
# with open('customer_profile.json', 'r') as file:
#     customer_data = json.load(file)

# with open('merchant_profile.json', 'r') as file:
#     merchant_data = json.load(file)

# # Function to generate a recommendation
# def generate_recommendation(customer, all_merchants):
#     # Prepare the prompt
#     prompt = f"""
#     **Customer Profile:**
#     - Name: {customer["Name"]}
#     - Sex: {customer["Sex"]}
#     - Age: {customer["Age"]}
#     - House Location: {customer["HouseLocation"]}
#     - Marital Status: {customer["MaritalStatus"]}
#     - Risk Level: {customer["Risk"]}
#     - Average Spending: {customer["avg_spending"]}
#     - Transactions by MCC Code: {json.dumps(customer["transactions_per_MCC_Code"], indent=4)}
#     - Transactions by Merchant Names: {json.dumps(customer["transactions_per_Merchant_Names"], indent=4)}
#     - Transactions by Subcategory: {json.dumps(customer["transactions_per_SubCategory"], indent=4)}
#     - Transactions by Location: {json.dumps(customer["transactions_per_LocationCity"], indent=4)}
#     - Transactions by Days: {json.dumps(customer["transactions_per_Days"], indent=4)}
#     - Transactions by Account Types: {json.dumps(customer["transactions_per_AccountTypes"], indent=4)}
#     - Offers Attracted To: {json.dumps(customer["Offer_Description"], indent=4)}

#     **All Merchants:**
#     all_merchants

#     **Task:**
#     Based on the customer's profile and behavior, analyze all the merchants and their offers. Recommend the best-fit merchant and offer for the customer. Provide a detailed explanation of why this recommendation is the best fit, considering factors such as:
#     - The customer's transaction history and preferences.
#     - The merchant's target audience and location.
#     - The customer's demographic and behavioral alignment with the merchant's offers.

#     **Output Format:**
#     - Recommended Merchant: [Merchant Name]
#     - Recommended Offer: [Offer Description]
#     - Reason: [Detailed explanation of why this merchant and offer are the best fit for the customer.]
#     """

#     # Generate the recommendation using the Gemini model
#     response = model.generate_content(prompt)
#     return response.text

# # Example: Generate a recommendation for the first customer and all merchants
# customer = customer_data[3]  # First customer in the list
# recommendation = generate_recommendation(customer, merchant_data)
# print("Recommendation:\n", recommendation)

In [9]:
# import json
# from transformers import pipeline
# from langchain.llms import HuggingFacePipeline
# from langchain.prompts import PromptTemplate
# from langchain.chains import LLMChain

# # Load customer and merchant profiles
# with open('customer_profile.json', 'r') as f:
#     customers = json.load(f)
# with open('merchant_profile.json', 'r') as f:
#     merchants = json.load(f)

# # Initialize the local LLM using HuggingFace pipeline
# # Using FLAN-T5-small as it's a smaller, efficient model
# text_generator = pipeline(
#     "text2text-generation",
#     model="google/flan-t5-small",  # Small, efficient model (~300MB)
#     max_length=512,
#     temperature=0.7
# )

# # Convert the pipeline to a LangChain compatible format
# local_llm = HuggingFacePipeline(pipeline=text_generator)

# # Define a prompt template for recommendation
# recommendation_prompt = PromptTemplate(
#     input_variables=["customer", "transaction", "merchants"],
#     template="""
#     Task: Recommend a merchant name  and specific offer based on the following information.
    
#     Customer Profile:
#     {customer}
    
#     New Transaction:
#     {transaction}
    
#     Available Merchants and Offers:
#     {merchants}
    
#     Please recommend a merchant name  and specific offer that best matches the customer's preferences and spending behavior.
#     Keep the recommendation concise and specific.
#     """,
# )

# # Create an LLM chain for recommendations
# recommendation_chain = LLMChain(llm=local_llm, prompt=recommendation_prompt)

# def recommend_offer(customer_id, new_transaction):
#     """
#     Generate personalized merchant recommendations based on customer profile and transaction.
    
#     Args:
#         customer_id: ID of the customer
#         new_transaction: Dictionary containing transaction details
        
#     Returns:
#         str: Recommendation text
#     """
#     # Fetch the customer profile
#     customer = next(c for c in customers if c["CustomerID"] == customer_id)
    
#     # Filter merchants based on MCC code or subcategory
#     relevant_merchants = [
#         m for m in merchants
#         if m["MCC_Code"] == new_transaction["MCC_Code"] or
#         m["SubCategory"] == new_transaction["SubCategory"]
#     ]
    
#     # Prepare inputs for the LLM
#     customer_info = json.dumps(customer, indent=2)
#     transaction_info = json.dumps(new_transaction, indent=2)
#     merchant_info = json.dumps(relevant_merchants, indent=2)
    
#     try:
#         # Generate recommendation
#         recommendation = recommendation_chain.run({
#             "customer": customer_info,
#             "transaction": transaction_info,
#             "merchants": merchant_info
#         })
        
#         return recommendation.strip()
    
#     except Exception as e:
#         return f"Error generating recommendation: {str(e)}"

# # Example usage
# if __name__ == "__main__":
#     # Example transaction
#     new_transaction = {
#         "MCC_Code": 5812,
#         "SubCategory": "Restaurant",
#         "Location": "Giza",
#         "Amount": 200.0
#     }
    
#     # Get a recommendation for customer with ID 0
#     recommendation = recommend_offer(0, new_transaction)
#     print("Recommendation:", recommendation)