In [152]:
import mysql.connector
import pandas as pd

In [153]:
def connect():
  connection = mysql.connector.connect(
    host = "localhost",
    user = "root",
    password = "",
    database = "wp-ecommerce"
  )

  cursor = connection.cursor(dictionary = True)

  return connection, cursor

In [154]:
def get_category_title(category_id):
  _, cursor = connect()

  sql = '''
    SELECT name FROM wp_terms
    LEFT JOIN wp_term_taxonomy ON wp_terms.term_id = wp_term_taxonomy.term_id
    WHERE wp_term_taxonomy.taxonomy = 'product_cat'
    AND wp_terms.term_id = (%s)
  '''

  cursor.execute(sql, (category_id, ))

  result = cursor.fetchone()

  return result["name"] if result else None

In [155]:
def get_product_categories(product_id):
  _, cursor = connect()

  sql = '''
    SELECT wp_term_relationships.object_id, wp_term_taxonomy.term_id
    FROM wp_term_relationships
    INNER JOIN wp_term_taxonomy ON wp_term_taxonomy.term_taxonomy_id = wp_term_relationships.term_taxonomy_id
    WHERE wp_term_taxonomy.taxonomy = 'product_cat'
    AND wp_term_relationships.object_id = (%s)
  '''

  cursor.execute(sql, (product_id, ))

  results = cursor.fetchall()

  category_ids = []

  for row in results:
    category_ids.append(row["term_id"])

  return category_ids

In [156]:
get_product_categories(56355)

[79]

In [157]:
get_category_title(79)

'مستحضرات تجميل'

In [158]:
def add_to_category_list(category_list, term_id, customer_id):
  for category in category_list:
    if category["term_id"] == term_id and category["customer_id"] == customer_id:
      category["count"] += 1
      return None
    
  category =  {
    "term_id": term_id,
    "customer_id": customer_id,
    "count": 1
  }

  category_list.append(category)

In [159]:
def build_category_by_customer_data():
  _, cursor = connect()

  sql = "SELECT ID FROM wp_users ORDER BY ID"

  cursor.execute(sql)

  results = cursor.fetchall()

  entries = []
  for row in results:
    user_id = row["ID"]

    # check if user is customer
    sql = '''
      SELECT * FROM wp_wc_customer_lookup WHERE user_id = (%s)
    '''

    cursor.execute(sql, (user_id, ))
    result = cursor.fetchone()
    customer_id = result["customer_id"] if result else None

    if customer_id:
      # get user's meta
      sql = '''
        SELECT * FROM wp_usermeta WHERE user_id = (%s) and meta_key IN ('country', 'age', 'gender')
      '''

      cursor.execute(sql, (user_id, ))
      usermeta = cursor.fetchall()

      country_meta = next(meta for meta in usermeta if meta["meta_key"] == "country")
      country = country_meta["meta_value"] if country_meta else "unknown"

      age_meta = next(meta for meta in usermeta if meta["meta_key"] == "age")
      age = age_meta["meta_value"] if age_meta else "unknown"

      gender_meta = next(meta for meta in usermeta if meta["meta_key"] == "gender")
      gender = gender_meta["meta_value"] if gender_meta else "unknown"
      
      # get purchased products by the customer
      sql = '''
        SELECT * from wp_wc_order_product_lookup WHERE customer_id = (%s)
      '''
      
      cursor.execute(sql, (customer_id, ))

      order_products = cursor.fetchall()

      category_list = []
      for product in order_products:
        customer_id = product["customer_id"]
        product_id = product["product_id"]

        term_ids = get_product_categories(product_id)

        for term_id in term_ids:
          add_to_category_list(category_list, term_id, customer_id)

      category_list = sorted(category_list, key = lambda c: c["count"], reverse = True)

      if len(category_list) > 0:
        term_id = category_list[0]["term_id"]
        term_title = get_category_title(term_id)
        purchase_count = category_list[0]["count"]

        entries.append({
          "user_id": user_id,
          "customer_id": customer_id,
          "country": country,
          "age": age,
          "gender": gender,
          "term_id": term_id,
          "term_title": term_title,
          "purchase_count": purchase_count,
        })
  
  data = pd.DataFrame(entries)

  return data

In [160]:
data = build_category_by_customer_data()

data

Unnamed: 0,user_id,customer_id,country,age,gender,term_id,term_title,purchase_count
0,1,294,SA,36,انثى,75,ملابس نسائي,4
1,2,736,JO,48,انثى,75,ملابس نسائي,4
2,3,435,KW,40,ذكر,78,إلكترونيات,8
3,5,86,JO,35,انثى,75,ملابس نسائي,4
4,6,245,BH,29,ذكر,74,ملابس رجالي,9
...,...,...,...,...,...,...,...,...
975,995,715,KW,44,انثى,75,ملابس نسائي,3
976,996,467,AE,26,انثى,75,ملابس نسائي,4
977,997,426,BH,54,انثى,75,ملابس نسائي,6
978,998,664,JO,43,ذكر,78,إلكترونيات,7


In [161]:
from sklearn.preprocessing import LabelEncoder

countryEncoder = LabelEncoder()
genderEncoder = LabelEncoder()

data["country"] = countryEncoder.fit_transform(data["country"])
data["gender"] = genderEncoder.fit_transform(data["gender"])

data[["user_id", "country", "gender"]].head()

Unnamed: 0,user_id,country,gender
0,1,4,0
1,2,2,0
2,3,3,1
3,5,2,0
4,6,1,1


In [162]:
x = data[["country", "age", "gender"]]

y = data["term_id"]

#### Imbalanced Data

In [163]:
from collections import Counter

counter = Counter(y)

for value, valueCount in counter.items():
  # get percentage of every class value frequent
  pct = valueCount / len(y) * 100

  print(f"Class: {value}, frequent: {valueCount}, pct: {pct}")


Class: 75, frequent: 382, pct: 38.9795918367347
Class: 78, frequent: 186, pct: 18.979591836734695
Class: 74, frequent: 264, pct: 26.93877551020408
Class: 79, frequent: 102, pct: 10.408163265306122
Class: 76, frequent: 43, pct: 4.387755102040816
Class: 82, frequent: 3, pct: 0.30612244897959184


In [164]:
from imblearn.over_sampling import RandomOverSampler

resample = RandomOverSampler()

x, y = resample.fit_resample(x, y)

In [165]:
counter = Counter(y)

for value, valueCount in counter.items():
  # get percentage of every class value frequent
  pct = valueCount / len(y) * 100

  print(f"Class: {value}, frequent: {valueCount}, pct: {pct}")

Class: 75, frequent: 382, pct: 16.666666666666664
Class: 78, frequent: 382, pct: 16.666666666666664
Class: 74, frequent: 382, pct: 16.666666666666664
Class: 79, frequent: 382, pct: 16.666666666666664
Class: 76, frequent: 382, pct: 16.666666666666664
Class: 82, frequent: 382, pct: 16.666666666666664


#### Evaluate Models

In [166]:
from sklearn.model_selection import cross_val_score

models = []

def compare_classification(models, x, y, cv):
  cross_validation_data = pd.DataFrame()

  for model in models:
    accuracy = cross_val_score(model, x, y, cv = cv, scoring = "accuracy")
    acc_avg = round(accuracy.mean(), 3)

    cross_validation_data[str(model)] = [acc_avg]
    cross_validation_data.index = ["Accuracy"]

  return cross_validation_data

In [167]:
from sklearn.tree import DecisionTreeClassifier

model = DecisionTreeClassifier()

models.append(model)

In [168]:
from sklearn.naive_bayes import CategoricalNB

model = CategoricalNB()

models.append(model)

In [169]:
from sklearn.neighbors import KNeighborsClassifier

model = KNeighborsClassifier()

models.append(model)

In [170]:
compare_classification(models, x, y, 10)

Unnamed: 0,DecisionTreeClassifier(),CategoricalNB(),KNeighborsClassifier()
Accuracy,0.826,0.713,0.696


#### Use Decision Tree

In [171]:
model = DecisionTreeClassifier()

model.fit(x.values, y.values)

In [172]:
sample_data = [
  [2, 22, 0]
]

print(model.predict_proba(sample_data))
print(model.predict(sample_data))

[[0.  0.  0.5 0.  0.5 0. ]]
[76]


#### Save & Load Fitted Model

In [173]:
import pickle

filename = "classification-model"

pickle.dump(model, open(filename, "wb"))

In [174]:
model = pickle.load(open(filename, "rb"))

sample_data = [
  [2, 22, 0]
]

model.predict(sample_data)

array([76], dtype=int64)

In [230]:
def predict_category_by_customer(filename, customer_details):
  model = pickle.load(open(filename, "rb"))

  prediction = model.predict([customer_details])

  return prediction[0]

In [231]:
predict_category_by_customer("classification-model", [1, 22, 1])

74

#### Export model as PHP

In [177]:
import m2cgen as m2c

php_model = m2c.export_to_php(model)

file = open("classification-model.php", "w")

file.write(php_model)

file.close()

In [234]:
def export_country_codes():
  connection, cursor = connect()

  sql = "DROP TABLE IF EXISTS wp_wc_country_codes"
  
  cursor.execute(sql)

  sql = '''
    CREATE TABLE wp_wc_country_codes (
      ID int(11) NOT NULL AUTO_INCREMENT,
      country char(2) NOT NULL,
      code int(11) NOT NULL,
      PRIMARY KEY (ID)
    )
  '''
  
  cursor.execute(sql)
  
  connection.commit()

  for country in countryEncoder.classes_:
    code = countryEncoder.transform([country])[0]
    code = int(code)

    sql = '''
      INSERT INTO wp_wc_country_codes VALUES (NULL, %s, %s)
    '''

    cursor.execute(sql, (country, code))
  
    connection.commit()

In [235]:
export_country_codes()

In [183]:
def export_gender_codes():
  connection, cursor = connect()

  sql = "DROP TABLE IF EXISTS wp_wc_gender_codes"
  
  cursor.execute(sql)

  sql = '''
    CREATE TABLE wp_wc_gender_codes (
      ID int(11) NOT NULL AUTO_INCREMENT,
      gender char(10) NOT NULL,
      code int(11) NOT NULL,
      PRIMARY KEY (ID)
    )
  '''
  
  cursor.execute(sql)
  
  connection.commit()

  for gender in genderEncoder.classes_:
    code = genderEncoder.transform([gender])[0]
    code = int(code)

    sql = '''
      INSERT INTO wp_wc_gender_codes VALUES (NULL, %s, %s)
    '''

    cursor.execute(sql, (gender, code))
  
    connection.commit()

In [184]:
export_gender_codes()

In [217]:
def get_best_seller_products_by_category(category_id, max_results = 3):
  _, cursor = connect()

  sql = '''
    SELECT wp_term_taxonomy.term_id, wp_wc_order_product_lookup.product_id, sum(wp_wc_order_product_lookup.product_qty) sales
    FROM wp_wc_order_product_lookup
    INNER JOIN wp_term_relationships ON wp_term_relationships.object_id = wp_wc_order_product_lookup.product_id
    INNER JOIN wp_term_taxonomy ON wp_term_taxonomy.term_taxonomy_id = wp_term_relationships.term_taxonomy_id
    WHERE wp_term_taxonomy.taxonomy = 'product_cat'
    AND wp_term_taxonomy.term_id = (%s)
    GROUP BY wp_term_taxonomy.term_id, wp_wc_order_product_lookup.product_id
    ORDER BY sales DESC
    LIMIT %s
  '''

  cursor.execute(sql, (category_id, max_results))

  results = cursor.fetchall()

  product_ids = [row["product_id"] for row in results]

  return product_ids

In [218]:
get_best_seller_products_by_category(78)

[56238, 56217, 56228]

In [243]:
def get_product_title(product_id):
  _, cursor = connect()

  sql = '''
    SELECT post_title FROM wp_posts WHERE ID = (%s)
  '''
  
  cursor.execute(sql, (product_id, ))
  
  result = cursor.fetchone()

  return result["post_title"] if result else None

In [220]:
def get_country_code(country):
  _, cursor = connect()

  sql = '''
    SELECT code FROM wp_wc_country_codes WHERE country = (%s)
  '''
  
  cursor.execute(sql, (country, ))
  
  result = cursor.fetchone()

  return result["code"] if result else None

In [221]:
def get_gender_code(gender):
  _, cursor = connect()

  sql = '''
    SELECT code FROM wp_wc_gender_codes WHERE gender = (%s)
  '''
  
  cursor.execute(sql, (gender, ))
  
  result = cursor.fetchone()

  return result["code"] if result else None

In [252]:
def recommend_products_by_customer(customer_id, max_results = 3):
  _, cursor = connect()

  sql = '''
    SELECT user_id FROM wp_wc_customer_lookup WHERE customer_id = (%s)
  '''
  
  cursor.execute(sql, (customer_id, ))
  
  result = cursor.fetchone()

  user_id = result["user_id"] if result else None

  # get user's meta
  sql = '''
    SELECT * FROM wp_usermeta WHERE user_id = (%s) and meta_key IN ('country', 'age', 'gender')
  '''

  cursor.execute(sql, (user_id, ))
  usermeta = cursor.fetchall()

  country_meta = next(meta for meta in usermeta if meta["meta_key"] == "country")
  country = country_meta["meta_value"] if country_meta else "unknown"
  country = get_country_code(country)

  age_meta = next(meta for meta in usermeta if meta["meta_key"] == "age")
  age = age_meta["meta_value"] if age_meta else "unknown"

  gender_meta = next(meta for meta in usermeta if meta["meta_key"] == "gender")
  gender = gender_meta["meta_value"] if gender_meta else "unknown"
  gender = get_gender_code(gender)

  predicted_category_id = predict_category_by_customer("classification-model", [country, age, gender])

  recommended_product_ids = get_best_seller_products_by_category(int(predicted_category_id), max_results)

  recommended_products = []

  for id in recommended_product_ids:
    product = get_product_title(id)
    recommended_products.append(product)

  return recommended_products

In [253]:
recommend_products_by_customer(1)

['فرن-أبيض-LG', 'غسالة-أسود-SAMSUNG', 'غسالة-أسود-LG']