# Prescriptive Analysis

In this analysis we find recommendations for each customer based on what they have purchased before. This will make it easier to plan which products to advertise, put on sale, give coupons or prioritize etc. 



## Imports

In [1]:
import pandas as pd
import numpy as np
from sklearn.neighbors import NearestNeighbors
from sqlalchemy import create_engine
from scipy.sparse import csr_matrix
from collections import defaultdict, Counter

## Database loading

In [None]:
# Load data from database
# Change the connection string according to your database configuration
engine = create_engine("postgresql+psycopg2://postgres:9362@localhost:5432/BID-eksamen")

query = """
SELECT 
    c.customer_id,
    p.product_id,
    p.product_category_name,
    c.customer_state
FROM fact_order o
JOIN dim_customer c ON o.seq_customer_sk = c.seq_customer_sk
JOIN fact_order_item oi ON o.seq_order_sk = oi.seq_order_sk
JOIN dim_product p ON oi.seq_product_sk = p.seq_product_sk;
"""

purchase_hist = pd.read_sql(query, engine)
purchase_hist = purchase_hist.drop_duplicates()

## Mapping the data

In [None]:
# Create mappings for customer and product IDs to indices
customer_map = {cid: i for i, cid in enumerate(purchase_hist['customer_id'].unique())}
product_map = {pid: i for i, pid in enumerate(purchase_hist['product_id'].unique())}

# Map the IDs to indices in the DataFrame
purchase_hist['cust_idx'] = purchase_hist['customer_id'].map(customer_map)
purchase_hist['prod_idx'] = purchase_hist['product_id'].map(product_map)

In [None]:
# Create a sparse user-item matrix
rows = purchase_hist['cust_idx'].values
cols = purchase_hist['prod_idx'].values
data = np.ones(len(purchase_hist), dtype=np.float32)

user_item_sparse = csr_matrix((data, (rows, cols)),
                              shape=(len(customer_map), len(product_map)))

## Function to compute the data

In [None]:
# Function to compute top-k item neighbors
# k is the number of neighbors to retrieve
# Returns a DataFrame with columns: item_idx, sim_item_idx, score, rank
# score is similarity (1 - distance)
# rank is the rank of the neighbor (1 is most similar)
# Excludes self-neighbors
def topk_item_neighbors(user_item_sparse, k=10):
    model = NearestNeighbors(n_neighbors=k+1, metric='cosine', algorithm='brute')
    model.fit(user_item_sparse.T)
    distances, indices = model.kneighbors(user_item_sparse.T)

    topk = []
    for item_idx in range(indices.shape[0]):
        for rank, (sim_idx, dist) in enumerate(zip(indices[item_idx][1:], distances[item_idx][1:]), start=1):
            topk.append((item_idx, sim_idx, 1 - dist, rank))
    return pd.DataFrame(topk, columns=["item_idx", "sim_item_idx", "score", "rank"])

topk_sim = topk_item_neighbors(user_item_sparse, k=10)

In [None]:
# Create a dictionary mapping each item to its top-k similar items
neighbors = defaultdict(list)
for row in topk_sim.itertuples(index=False):
    neighbors[row.item_idx].append((row.sim_item_idx, row.score))


## Generating Recommendations

In [None]:
# Generate recommendations for each customer
# For each customer, find products similar to those they have purchased
# Exclude products they have already purchased
recs = []
for cust_id, cust_idx in customer_map.items():
    user_vector = user_item_sparse[cust_idx]
    purchased = set(user_vector.indices)

    candidate_scores = Counter()
    for pid in purchased:
        for sim_pid, score in neighbors[pid]:
            if sim_pid not in purchased:
                candidate_scores[sim_pid] += score

    for rank, (pid_idx, score) in enumerate(candidate_scores.most_common(5), start=1):
        if score > 0:
            recs.append({
                "customer_id": cust_id,
                "recommended_product_id": [k for k, v in product_map.items() if v == pid_idx][0],
                "rank": rank,
                "score": score
            })

recommendations_df = pd.DataFrame(recs)

## Creating table with column/rows

In [None]:
# Add product category and customer state to recommendations
# Create lookup tables
product_lookup = purchase_hist.drop_duplicates('product_id').set_index('product_id')['product_category_name']
customer_lookup = purchase_hist.drop_duplicates('customer_id').set_index('customer_id')['customer_state']

# Map the lookups to the recommendations
recommendations_df['category'] = recommendations_df['recommended_product_id'].map(product_lookup)
recommendations_df['state'] = recommendations_df['customer_id'].map(customer_lookup)

## Saving the data to an Excel file

In [None]:
# Save recommendations to Excel
output_path = "customer_recommendations.xlsx"
recommendations_df.to_excel(output_path, index=False)
print(f"Saved recommendations to {output_path}")