# Recommendation System

Two approaches to build a recommendation system with some common processes are :
1. Classfication Algorithms
2. Clustering Algorithms

![](../images/DataMiningProject.png)

## Using Clustering algorithm

In [31]:
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
import pandas as pd

# Sample data
data = [
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "architecture", "medium", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["yellow", "nature", "medium", "portrait"],
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "people", "medium", "landscape"],
    ["blue", "nature", "thumbnail", "portrait"],
    ["yellow", "architecture", "thumbnail", "landscape"],
    ["blue", "people", "medium", "portrait"],
    ["yellow", "nature", "medium", "landscape"],
    ["yellow", "people", "thumbnail", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["red", "architecture", "thumbnail", "landscape"],
]

# Encode categorical features
label_encoders = [LabelEncoder() for _ in range(len(data[0]))]
encoded_data = []
for i, column in enumerate(zip(*data)):
    encoded_data.append(label_encoders[i].fit_transform(column))

X = list(zip(*encoded_data))  # Features

# Clustering
k = 2  # Number of clusters
kmeans = KMeans(n_clusters=k, n_init=10)
kmeans.fit(X)
clusters = kmeans.labels_

# Add cluster labels to the original data
data_with_clusters = pd.DataFrame(data, columns=["Color", "Category", "Size", "Type"])
data_with_clusters["Cluster"] = clusters

# Recommendation function
def recommend_items(cluster, data_with_clusters):
    items_in_cluster = data_with_clusters[data_with_clusters["Cluster"] == cluster]
    recommended_items = items_in_cluster.sample(n=3)  # Sample 3 items from the cluster
    return recommended_items

# Example usage
user_interaction = ["green", "nature", "thumbnail", "landscape"]  # Assuming user interacted with this item
encoded_interaction = [label_encoders[i].transform([val])[0] for i, val in enumerate(user_interaction)]
cluster = kmeans.predict([encoded_interaction])[0]
recommendations = recommend_items(cluster, data_with_clusters)
print("Recommended items:")
print(recommendations)

reco_list = recommendations.values.tolist()
reco_list = [sublist[:-1] for sublist in reco_list]


Recommended items:
   Color      Category       Size       Type  Cluster
1   blue  architecture     medium   portrait        0
0  green        nature  thumbnail  landscape        0
2   blue        people     medium  landscape        0


## Using classification algorithm

In [30]:
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score

# Sample data
data = [
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "architecture", "medium", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["yellow", "nature", "medium", "portrait"],
    ["green", "nature", "thumbnail", "landscape"],
    ["blue", "people", "medium", "landscape"],
    ["blue", "nature", "thumbnail", "portrait"],
    ["yellow", "architecture", "thumbnail", "landscape"],
    ["blue", "people", "medium", "portrait"],
    ["yellow", "nature", "medium", "landscape"],
    ["yellow", "people", "thumbnail", "portrait"],
    ["blue", "people", "medium", "landscape"],
    ["red", "architecture", "thumbnail", "landscape"],
]
result = [
    "Favorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
]

# Encode categorical features and labels
label_encoders = [LabelEncoder() for _ in range(len(data[0]))]
encoded_data = []
for i, column in enumerate(zip(*data)):
    encoded_data.append(label_encoders[i].fit_transform(column))

X = list(zip(*encoded_data))  # Features
y = result  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the logistic regression classifier
classifier = svm.SVC()
classifier.fit(X_train, y_train)

# Predictions
y_pred = classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Sample prediction
print(reco_list)
for prediction in reco_list:
    encoded_item = [label_encoders[i].transform([val])[0] for i, val in enumerate(prediction)]
    prediction = classifier.predict([encoded_item])[0]
    print(f"Prediction for the sample item: {prediction}")
    
# sample_item = ["green", "nature", "thumbnail", "landscape"]  # Sample item attributes
# encoded_item = [label_encoders[i].transform([val])[0] for i, val in enumerate(sample_item)]
# prediction = classifier.predict([encoded_item])[0]
# print(f"Prediction for the sample item: {prediction}")


Accuracy: 0.6666666666666666
[['blue', 'nature', 'thumbnail', 'portrait'], ['green', 'nature', 'thumbnail', 'landscape'], ['blue', 'people', 'medium', 'landscape']]
Prediction for the sample item: Favorite
Prediction for the sample item: Favorite
Prediction for the sample item: Favorite


In [None]:
!pip install pynput==1.7.6

In [21]:
from pynput import keyboard

def on_press(key):
    print('{0} pressed'.format(
        key))
    try:
        if key == keyboard.Key.esc:
            print('Escape key pressed, ending program')
            return False  # Stop listener
        elif key == keyboard.Key.left:
            print('Left arrow key pressed')
        elif key == keyboard.Key.right:
            print('Right arrow key pressed')
    except AttributeError:
        pass

with keyboard.Listener(on_press=on_press) as listener:
    listener.join()

Key.right pressed
Right arrow key pressed
Key.left pressed
Left arrow key pressed
Key.esc pressed
Escape key pressed, ending program


In [38]:
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import numpy as np

# Sample data
data = [
    [["green", "red", "blue"], ["nature", "building"], "thumbnail", "landscape"],
    [["blue"], ["architecture"], "medium", "portrait"],
    [["blue"], ["people"], "medium", "landscape"],
    [["yellow"], ["nature"], "medium", "portrait"],
    [["green", "red", "blue"], ["nature"], "thumbnail", "landscape"],
    [["blue"], ["people"], "medium", "landscape"],
    [["blue"], ["nature"], "thumbnail", "portrait"],
    [["yellow"], ["architecture"], "thumbnail", "landscape"],
    [["blue"], ["people"], "medium", "portrait"],
    [["yellow"], ["nature"], "medium", "landscape"],
    [["yellow"], ["people"], "thumbnail", "portrait"],
    [["blue"], ["people"], "medium", "landscape"],
    [["red"], ["architecture"], "thumbnail", "landscape"],
]

result = [
    "Favorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
]

i = 0
for properties in data:
    # Encode color and tags subarrays using OneHotEncoder
    onehot_encoder = OneHotEncoder(sparse=False)

    color_data = onehot_encoder.fit_transform(properties[0])
    category_data = onehot_encoder.fit_transform(properties[1])

    # Encode size and orientation feature using LabelEncoder
    label_encoder = LabelEncoder()

    size_data = label_encoder.fit_transform(properties[2]).reshape(-1, 1)
    orientation_data = label_encoder.fit_transform(properties[3]).reshape(-1, 1)
    
    data[i] = [color_data, category_data, size_data, orientation_data]
    print(data[i])
    
    i+=1

# X = list(zip(*encoded_data))  # Features
# y = result  # Labels

# # Split the data into training and testing sets
# X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# # Train the logistic regression classifier
# classifier = svm.SVC()
# classifier.fit(X_train, y_train)

# # Predictions
# y_pred = classifier.predict(X_test)

# # Evaluate the classifier
# accuracy = accuracy_score(y_test, y_pred)
# print(f"Accuracy: {accuracy}")

# # Sample test data
# test_data = [
#     [["green", "blue"], ["nature"], "thumbnail", "landscape"],
#     [["red"], ["architecture"], "medium", "portrait"],
#     [["blue", "yellow"], ["people"], "medium", "landscape"],
# ]

# # Encode test data using the same encoding schemes as the training data
# test_color_data = onehot_encoder.transform([sublist[0] for sublist in test_data])
# test_category_data = onehot_encoder.transform([sublist[1] for sublist in test_data])
# test_size_data = label_encoder.transform([sublist[2] for sublist in test_data]).reshape(-1, 1)
# test_orientation_data = label_encoder.transform([sublist[3] for sublist in test_data]).reshape(-1, 1)

# # Concatenate encoded test data
# X_test = np.concatenate((test_color_data, test_category_data, test_size_data, test_orientation_data), axis=1)

# # Make predictions on test data
# y_pred = classifier.predict(X_test)

# # Print predictions
# for i, prediction in enumerate(y_pred):
#     print(f"Prediction for test data {i+1}: {prediction}")




ValueError: Expected 2D array, got 1D array instead:
array=['green' 'red' 'blue'].
Reshape your data either using array.reshape(-1, 1) if your data has a single feature or array.reshape(1, -1) if it contains a single sample.

In [3]:
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Sample data
data = [
    [["green", "red", "blue", "None"], ["nature", "building", "None"], "thumbnail", "landscape"],
    [["blue", "None", "None", "None"], ["architecture", "None", "None"], "medium", "portrait"],
    [["blue", "None", "None", "None"], ["people", "None", "None"], "medium", "landscape"],
    [["yellow", "None", "None", "None"], ["nature", "None", "None"], "medium", "portrait"],
    [["green", "red", "blue", "None"], ["nature", "None", "None"], "thumbnail", "landscape"],
    [["blue", "None", "None", "None"], ["people", "None", "None"], "medium", "landscape"],
    [["blue", "None", "None", "None"], ["nature", "None", "None"], "thumbnail", "portrait"],
    [["yellow", "None", "None", "None"], ["architecture", "None", "None"], "thumbnail", "landscape"],
    [["blue", "None", "None", "None"], ["people", "None", "None"], "medium", "portrait"],
    [["yellow", "None", "None", "None"], ["nature", "None", "None"], "medium", "landscape"],
    [["yellow", "None", "None", "None"], ["people", "None", "None"], "thumbnail", "portrait"],
    [["blue", "None", "None", "None"], ["people", "None", "None"], "medium", "landscape"],
    [["red", "None", "None", "None"], ["architecture", "None", "None"], "thumbnail", "landscape"],
]

result = [
    "Favorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
    "Favorite",
    "Favorite",
    "NotFavorite",
    "NotFavorite",
]

# Flatten the data subarrays
flat_data = []
for sublist in data:
    flat_array = []
    for item in sublist:
        if isinstance(item, list):
            # If item is a list, flatten it and append its elements to flat_data
            flat_array.extend(item)
        else:
            # If item is not a list, append it as is to flat_data
            flat_array.append(item)
            
    flat_data.append(flat_array)
    
# Encode categorical features and labels
label_encoders = [LabelEncoder() for _ in range(len(flat_data[0]))]
encoded_data = []
for i, column in enumerate(zip(*flat_data)):
    encoded_data.append(label_encoders[i].fit_transform(column))

X = list(zip(*encoded_data))  # Features
y = result  # Labels

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the logistic regression classifier
classifier = svm.SVC()
classifier.fit(X_train, y_train)

# Predictions
y_pred = classifier.predict(X_test)

# Evaluate the classifier
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")

# Sample prediction
    
sample_item = ["green", "blue", "None", "None", "nature", "None", "None", "medium", "landscape"]  # Sample item attributes
encoded_item = [label_encoders[i].transform([val])[0] for i, val in enumerate(sample_item)]
prediction = classifier.predict([encoded_item])[0]
print(f"Prediction for the sample item: {prediction}")


Accuracy: 0.6666666666666666


ValueError: y contains previously unseen labels: 'blue'