# Experience Platform Data Science Workspace
Adobe Experience Platform enables data scientists to wrangle with data and analyse relationships and build prediction models with native Jupyter Notebook environment. You can manage GPU and memory for improving data processing.

#### Manage the modules with **`pip`**

In [None]:
!pip list --format=columns

#### Include required libraries

In [None]:
import numpy as np
import pandas as pd
import math
from scipy.sparse.linalg import svds
from scipy.sparse import csr_matrix
from sklearn.model_selection import train_test_split
import matplotlib.pylab as plt

#### Adobe Experience Platform libraries to access (read / write) dataset

In [None]:
from platform_sdk.models import Dataset
from platform_sdk.dataset_writer import DatasetWriter
from platform_sdk.dataset_reader import DatasetReader


#### Set number of columns to display

In [None]:
pd.set_option('display.max_columns',40)


#### Access datasets to read / write operation with easy menu and APIs

<img src="https://acsdemoaccelerators.s3.amazonaws.com/commons/images/aep-dsw-dataset-access.png" width=100 border=1 />&nbsp;&nbsp;&nbsp;&nbsp;<img src="https://acsdemoaccelerators.s3.amazonaws.com/commons/images/aep-dsw-dataset-menu.png" width=209 border=1 />


In [None]:
# DATASET: NCX Online activities dataset
dataset_reader = DatasetReader(get_platform_sdk_client_context(), dataset_id="5ff56bcf808f40194b1caedb")

In [None]:
cee = dataset_reader.limit(80000).read()

### Perform data transformation for evaluation
----

#### We'll focus on records having product related data

In [None]:
xd = cee[cee['productListItems'].apply(lambda a: len(a) > 0)].copy()

#### Functions to normalize data

##### In our Customer Experience Event data, the product related data is an array of JSON object. Each JSON contains the product name and other details. These function will help to normalize the data by repeating each Experience Event by number of products present in array

In [None]:
def get_product_name(obj):
    try:
        return obj["name"];
    except:
        return "NA";

In [None]:
def split_df(dataframe, col_name, sep):
    orig_col_index = dataframe.columns.tolist().index(col_name)
    orig_index_name = dataframe.index.name
    orig_columns = dataframe.columns
    dataframe = dataframe.reset_index()
    index_col_name = (set(dataframe.columns) - set(orig_columns)).pop()
    df_split = pd.DataFrame(
        pd.DataFrame(dataframe[col_name].str.split(sep).tolist())
        .stack().reset_index(level=1, drop=1), columns=[col_name])
    df = dataframe.drop(col_name, axis=1)
    df = pd.merge(df, df_split, left_index=True, right_index=True, how='inner')
    df = df.set_index(index_col_name)
    df.index.name = orig_index_name

    return df


In [None]:
productListNormalized = pd.DataFrame(xd['productListItems'].apply(lambda a: '|'.join(list(map((lambda b: get_product_name(b)),a)))))
productListNormalized.rename(columns={"productListItems":"productListItemsNormalized"},inplace=True)

xd = pd.merge(xd,productListNormalized, left_index=True, right_index=True, how='inner')
xd = split_df(xd, 'productListItemsNormalized', "\|")
xd = xd[xd["productListItemsNormalized"] != 'NA']

#### We will assign weightage to various user interaction on the web page

In [None]:
event_type_strength = {
    'checkout': 4.0,
    'order': 3.0,
    'addToCart': 2.0,
    'other': 0.5
}

In [None]:
xd['eventStrength'] = xd['web.webPageDetails.name'] \
                    .apply(lambda x: event_type_strength[x] if x in event_type_strength.keys() else event_type_strength['other'])


### Building a simple recommendation system using Collaborative filtering method
----

#### Since a user may visit a product multiple times; so, we will use a function to smooth the initial weightage

In [None]:
def smooth_user_preference(x):
    return math.log(1+x, 2)

In [None]:
productInteractions = xd.groupby(['endUserIDs._experience.emailid.id', 'productListItemsNormalized'])['eventStrength'].sum() \
                    .apply(smooth_user_preference).reset_index()

interaction_pivot = productInteractions.pivot(
    index='endUserIDs._experience.emailid.id',columns='productListItemsNormalized',values='eventStrength').fillna(0)

interaction_pivot.head()

#### Using sparse matrix to keep fewer non-zero values

In [None]:
interaction_pivot_matrix = interaction_pivot.as_matrix()
interaction_pivot_sparse_matrix = csr_matrix(interaction_pivot_matrix)

customers = list(interaction_pivot.index)

### Singular Value Decomposition (SVD) 

Latent factor models compress interaction matrix into a lower dimensional representation in terms of latent factors; here we will use SVD for latent factor model. The number of factors enables to memorize precise value during reconstruction; however, not well suited for generalization of the model. So, choose the factor value (K) wisely.

In [None]:
U, sigma, Vt = svds(interaction_pivot_sparse_matrix, k=20)

sigma = np.diag(sigma)

<br>

Reconstruct the matrix by using the factors; this will generate predictions for items, user has not interacted

In [None]:
predicted_interaction = np.dot(np.dot(U, sigma), Vt)
predicted_interaction_norm = (predicted_interaction - predicted_interaction.min()) / (predicted_interaction.max() - predicted_interaction.min())
cf_predictions = pd.DataFrame(predicted_interaction_norm, columns=interaction_pivot.columns, index=customers)

In [None]:
cf_predictions.head()

In [None]:
# With K=20
cf_predictions.loc['badajena+aldi1@adobetest.com'].sort_values(ascending=False)[:3]

#### Function to generate the data, compatible to Platform dataset to store the recommendation for individual user

In [None]:
def get_cf_recommendation(x):
    NUM_OF_RECOMMENDATION = 3
    cin = {"_salesvelocity":{"emailid":"","recommendedProducts":[]}}
    cin["_salesvelocity"]["emailid"] = x
    cin["_salesvelocity"]["recommendedProducts"] = list(map(lambda y:{"name": y}, list(cf_predictions.loc[x].sort_values(ascending=False)[:NUM_OF_RECOMMENDATION].to_dict().keys())))
    return cin
    

In [None]:
recommendations = pd.DataFrame(list(map(get_cf_recommendation, customers)))

recommendations.head()

### Write product recommendation to Platform dataset

In [None]:
dataset = Dataset(get_platform_sdk_client_context()).get_by_id(dataset_id="6007eac9c33d56194a1ce5ec")
dataset_writer = DatasetWriter(get_platform_sdk_client_context(), dataset)
write_tracker = dataset_writer.write(recommendations, file_format='json')