In [1]:
import pandas as pd
import numpy as np

from collections import Counter

from sklearn.ensemble import GradientBoostingClassifier

import pickle

In [2]:
anclr_products = ['Keyloss', 'Paint', 'Tires', 'Windshield']

In [3]:
# Train and save final models
def train_final_models():
    data, X, y_values = read_data()
    
    for i, y in enumerate(anclr_products):
        model = GradientBoostingClassifier(max_depth=5, random_state=0)
        model.fit(X, y_values[:, i])

        pickle.dump(model, open('Saved_Files/' + y + '.sav', 'wb'), protocol=2)

In [4]:
# Complete copy of np.mode with some modifications
def get_mode(a, axis=0):
    a = np.array(a)
    scores = np.unique(np.ravel(a)) 
    testshape = list(a.shape)
    testshape[axis] = 1
    oldmostfreq = np.zeros(testshape)
    oldcounts = np.zeros(testshape)

    for score in scores:
        template = (a == score)
        counts = np.expand_dims(np.sum(template, axis),axis)
        mostfrequent = np.where(counts > oldcounts, score, oldmostfreq)
        oldcounts = np.maximum(counts, oldcounts)
        oldmostfreq = mostfrequent

    return mostfrequent, oldcounts

In [5]:
# Data is already encoded
def read_data():
    data = pd.read_csv("formated_data.csv")
    data = data.iloc[:200000]
    data = data[(data['Keyloss'] != 0) | 
                (data['Paint'] != 0) |
                (data['Windshield'] != 0) |
                (data['Tires'] != 0)]
    
    y_values = data[anclr_products]  
    y_values = [data[product] for product in anclr_products]
    y_values = np.array(y_values).T
    # Delete columns with products
    X = np.array(data.drop(anclr_products, axis=1))
    
    return data, X, y_values

In [6]:
def get_recommendations(test_sample):
    # load the model from disk
    predictions = {"Keyloss": 0,
                   "Paint": 0,
                   "Tires": 0,
                   "Windshield": 0,
                   "User": 0}
    
    for i, y in enumerate(anclr_products):
        model = pickle.load(open('Saved_Files/' + y + '.sav', 'rb'))
        predictions[y] += model.predict_proba(np.array(test_sample).reshape(1, -1))[0][1]
    
    
    most_probable = sorted(list(predictions.values()))[-2:]
    for key, value in predictions.items():
        if value in most_probable:
            predictions[key] = 1
        else:
            predictions[key] = 0

#     similar = get_similar_users(test_sample)
#     predictions["User"] = similar # 0 until we find a similar user
      
    return predictions

In [7]:
# Extract label encoders and transform test sample
def encode_test(test_sample):
    features_to_transform = ["Behavior", "Location", "Parking Space", "Purpose"]
    for feature in features_to_transform:
        encoder = pickle.load(open('Saved_Files/' + feature + '.enc', 'rb'))
        if feature == "Parking Space" or feature == "Purpose":
            test_sample[feature] = encoder.transform(["".join(sorted(test_sample[feature].split("|")))])[0]
        else:
            test_sample[feature] = encoder.transform([test_sample[feature]])[0]

### Model

In [9]:
train_final_models()

In [10]:
#test_sample = {"Age": 24, # (16 - 99)
#               "Behavior": "Aggressive", # (Passive - Neutral - Aggressive)
 #              "Location": "W", #(W, MW, NE, S)
  #             "Parking Space": "Garage|Parkinglot/R|Street", # (Garage, Parkinglot, Parkinglot/R, Street) Multiple options possible
   #            "Purpose": "Commuting", # (Racing, Leisure, Working, Commuting, Traveling) Multiple options possible
    #           "Usage": 15} # (1 - 30)


samples = []
for i in range(10):
    samples.append({"Age": np.random.randint(16, 80),
                  "Behavior": np.random.choice(["Passive", "Neutral", "Aggressive"]),
                  "Location": np.random.choice(["W", "MW", "NE", "S"]),
                  "Parking Space": np.random.choice(["Garage", "Parkinglot", "Parkinglot/R", "Street"]),
                  "Purpose": np.random.choice(["Racing", "Leisure", "Working", "Commuting", "Traveling"]),
                  "Usage": np.random.randint(5, 30)})

# Recommend 2 most probable products
# Similar user is 0 until we find a similar user
for sample in samples:
    print("Input: ")
    print(sample)
    encode_test(sample)
    print("Output: ")
    print(get_recommendations(list(sample.values())))
    print()

Input: 
{'Age': 31, 'Behavior': 'Neutral', 'Location': 'NE', 'Parking Space': 'Parkinglot/R', 'Purpose': 'Traveling', 'Usage': 15}
Output: 
{'Keyloss': 0, 'Paint': 0, 'Tires': 1, 'Windshield': 1, 'User': 0}

Input: 
{'Age': 31, 'Behavior': 'Neutral', 'Location': 'NE', 'Parking Space': 'Parkinglot/R', 'Purpose': 'Working', 'Usage': 7}
Output: 
{'Keyloss': 0, 'Paint': 0, 'Tires': 1, 'Windshield': 1, 'User': 0}

Input: 
{'Age': 42, 'Behavior': 'Neutral', 'Location': 'S', 'Parking Space': 'Garage', 'Purpose': 'Leisure', 'Usage': 25}
Output: 
{'Keyloss': 0, 'Paint': 0, 'Tires': 1, 'Windshield': 1, 'User': 0}

Input: 
{'Age': 35, 'Behavior': 'Aggressive', 'Location': 'MW', 'Parking Space': 'Garage', 'Purpose': 'Working', 'Usage': 21}
Output: 
{'Keyloss': 0, 'Paint': 1, 'Tires': 0, 'Windshield': 1, 'User': 0}

Input: 
{'Age': 71, 'Behavior': 'Neutral', 'Location': 'W', 'Parking Space': 'Parkinglot', 'Purpose': 'Working', 'Usage': 16}
Output: 
{'Keyloss': 0, 'Paint': 1, 'Tires': 0, 'Windshield



{'Keyloss': 0, 'Paint': 0, 'Tires': 1, 'Windshield': 1, 'User': 0}

Input: 
{'Age': 31, 'Behavior': 'Neutral', 'Location': 'W', 'Parking Space': 'Parkinglot/R', 'Purpose': 'Working', 'Usage': 17}
Output: 
{'Keyloss': 0, 'Paint': 1, 'Tires': 0, 'Windshield': 1, 'User': 0}

Input: 
{'Age': 38, 'Behavior': 'Neutral', 'Location': 'MW', 'Parking Space': 'Parkinglot/R', 'Purpose': 'Leisure', 'Usage': 19}
Output: 
{'Keyloss': 0, 'Paint': 1, 'Tires': 1, 'Windshield': 0, 'User': 0}

Input: 
{'Age': 46, 'Behavior': 'Aggressive', 'Location': 'NE', 'Parking Space': 'Street', 'Purpose': 'Racing', 'Usage': 6}
Output: 
{'Keyloss': 0, 'Paint': 1, 'Tires': 1, 'Windshield': 0, 'User': 0}

Input: 
{'Age': 68, 'Behavior': 'Neutral', 'Location': 'NE', 'Parking Space': 'Parkinglot/R', 'Purpose': 'Traveling', 'Usage': 7}
Output: 
{'Keyloss': 0, 'Paint': 0, 'Tires': 1, 'Windshield': 1, 'User': 0}

