<a href="https://colab.research.google.com/github/Augustin-JR/Customer-segmentation-and-personalization-AIML-project/blob/main/customer_segmentation_and_personalization_AI_ML_project_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np

# Define number of sample customers
num_customers = 1000

# Generate sample customer data
np.random.seed(42)
data = {
    'customer_id': [f'CUST{i}' for i in range(1, num_customers + 1)],
    'age': np.random.randint(18, 60, num_customers),
    'purchase_count': np.random.randint(1, 50, num_customers),
    'average_order_value': np.random.uniform(50, 500, num_customers)
}

# Create DataFrame
customer_data = pd.DataFrame(data)
customer_data.to_csv('tamil_nadu_company_data.csv', index=False)
print("Sample customer data saved as 'tamil_nadu_company_data.csv'")

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler

# Select features for segmentation
segmentation_features = customer_data[['age', 'purchase_count', 'average_order_value']]
scaler = StandardScaler()
features_scaled = scaler.fit_transform(segmentation_features)

# Apply KMeans for segmentation
kmeans = KMeans(n_clusters=5, random_state=42)
customer_data['segment'] = kmeans.fit_predict(features_scaled)

# Display summary by segment
segment_summary = customer_data.groupby('segment').mean(numeric_only=True)
print(segment_summary)

# Generate sample purchase data
purchase_data = {
    'customer_id': np.random.choice(customer_data['customer_id'], 5000),
    'product_id': np.random.randint(1, 101, 5000),  # Assume 100 products
    'rating': np.random.randint(1, 6, 5000)  # Ratings between 1 and 5
}

purchase_data_df = pd.DataFrame(purchase_data)
purchase_data_df.to_csv('purchase_data.csv', index=False)
print("Sample purchase data saved as 'purchase_data.csv'")

# Create a pivot table of customer ratings for each product
customer_product_matrix = purchase_data_df.pivot_table(index='customer_id', columns='product_id', values='rating').fillna(0)

# Compute item similarity (cosine similarity) using scikit-learn
from sklearn.metrics.pairwise import cosine_similarity

# Transpose the matrix to get products on rows and compute similarity
product_similarity = cosine_similarity(customer_product_matrix.T)
product_similarity_df = pd.DataFrame(product_similarity, index=customer_product_matrix.columns, columns=customer_product_matrix.columns)

# Function to get top recommendations based on item similarity
def get_item_recommendations(product_id, num_recommendations=5):
    if product_id not in product_similarity_df.index:
        return []
    similar_scores = product_similarity_df[product_id].sort_values(ascending=False)
    similar_products = similar_scores.iloc[1:num_recommendations + 1].index.tolist()
    return similar_products

# Example usage: Recommend products similar to product_id 1
recommended_products = get_item_recommendations(1)
print(f"Products similar to product 1: {recommended_products}")

def create_campaign_message(customer_id, segment, recommendations, language='English'):
    if segment == 0:
        msg = f"Customer {customer_id}, check out our exclusive recommendations for you!"
    elif segment == 1:
        msg = f"Special offer for you, {customer_id}! Here are your top picks!"
    else:
        msg = f"Hello {customer_id}, don’t miss these popular products!"

    # Add festival-specific message
    msg += " Celebrate this Pongal with our special discounts!" if language == 'English' else " இந்த பொங்கலை சிறப்பிக்க சிறப்பு சலுகைகள்!"

    return msg + f" Recommended items: {recommendations}"

# Example: Create campaign message for a customer
customer_id = 'CUST1'
segment = customer_data.loc[customer_data['customer_id'] == customer_id, 'segment'].values[0]
print(create_campaign_message(customer_id, segment, recommended_products, language='Tamil'))

# Define a second campaign message function with a promotional offer
def create_campaign_message_v2(customer_id, segment, recommendations, language='English'):
    # Create a message based on segment
    if segment == 0:
        msg = f"Hello {customer_id}! Our exclusive deals are here for you!"
    elif segment == 1:
        msg = f"Hi {customer_id}, don't miss out on our top picks just for you!"
    elif segment == 2:
        msg = f"{customer_id}, as one of our valued customers, enjoy a 20% discount on your next purchase!"
    elif segment == 3:
        msg = f"Special alert, {customer_id}! Your favorite items are on sale!"
    else:
        msg = f"Dear {customer_id}, discover products recommended just for you!"

    # Adding a regional message option
    festival_offer = " Pongal special discounts!" if language == 'English' else " பொங்கல் சிறப்பு சலுகைகள்!"
    msg += festival_offer

    # Adding product recommendations to the message
    msg += f" Recommended items: {', '.join(recommendations)}"

    return msg

# Example usage of the new campaign message function
customer_id = 'CUST25'  # Example customer ID
segment = 2  # Assuming this customer belongs to segment 2
recommended_products = ['product_5', 'product_12', 'product_30']  # Example recommended products

# Generate the campaign message for the given customer and segment using the new function
campaign_message_v2 = create_campaign_message_v2(customer_id, segment, recommended_products, language='Tamil')
print(campaign_message_v2)


from sklearn.metrics import silhouette_score

# Evaluate segmentation with silhouette score
silhouette_avg = silhouette_score(features_scaled, customer_data['segment'])
print(f'Silhouette Score for Segmentation: {silhouette_avg}')

# For recommendations, accuracy metrics are not directly applicable here due to lack of explicit test data.
# Instead, we can use feedback from users to improve recommendations.


Sample customer data saved as 'tamil_nadu_company_data.csv'
               age  purchase_count  average_order_value
segment                                                
0        27.019324       12.357488           244.525265
1        26.174359       35.892308           326.938969
2        48.974359       11.338462           312.218756
3        44.091787       31.362319           126.496904
4        47.811224       36.775510           370.488738
Sample purchase data saved as 'purchase_data.csv'
Products similar to product 1: [74, 69, 32, 7, 55]
Hello CUST1, don’t miss these popular products! இந்த பொங்கலை சிறப்பிக்க சிறப்பு சலுகைகள்! Recommended items: [74, 69, 32, 7, 55]
CUST25, as one of our valued customers, enjoy a 20% discount on your next purchase! பொங்கல் சிறப்பு சலுகைகள்! Recommended items: product_5, product_12, product_30
Silhouette Score for Segmentation: 0.26133263426776737
