## Business Recommendation System

Adaptated from: 
- https://www.kaggle.com/code/sagarbapodara/coursera-course-recommendation-system-webapp

## Package Declaration

In [237]:
import os
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity as cs

import nltk
from nltk.stem.porter import PorterStemmer

## ETL

In [329]:
def ETL(csv_file):
    """
    input:
        csv file
    deskripsi:
        mengekstrak, transformasi data, 
        dan load data kedalam dataframe/variabel baru
    output:
        df,new_df,users
    """
    #Variabel Lokal
    users={}

    #Extract
    df = pd.read_csv(csv_file)
    df.bidang_keahlian = df.bidang_keahlian.apply(lambda x: x.replace("[","").replace("]","").replace("'",""))
    df.hobi = df.hobi.apply(lambda x: x.replace("[","").replace("]","").replace("'",""))
    users = extract_user_datas(df)
    # print(df)
    # ##show statistics
    # df.info()
    # df.isnull().sum()
    # df.level
    # df.bidang_keahlian
    # df.hobi
    # df.modal_usaha

    # #Transform
    # df.user_id= df.user_id.astype(str)

    # #Load
    # #load main datas to new dataframe will be used
    # df["features"] = df["bidang_keahlian"] + "," + df["hobi"]  + "," + df["modal_usaha"]
    # new_df = df[["user_id","features"]]
    # new_df.features = new_df.features.apply(lambda x: x.replace(","," "))
    # new_df.features = new_df.features.apply(lambda x: x.lower())

    # new_df

    return df,new_df,users

def extract_user_datas(df):
    """
    output: 
        list users
    """
    row = df.values.tolist()
    # print(row[0])
    for r in row:
        # print(r)
        user_meta_data = {
            "user_id":r[0],
            "status":r[1],
            "bidang_keahlian":r[2],
            "hobi":r[3],
            "modal_usaha":r[4],
            "nama_usaha":r[5]
        }

        users[r[0]] = user_meta_data
    return users

def get_users_who_deserved_list(users):
    """
    input:
        list user
    deskripsi:
        Penentuan userId yang "layak" mendapatkan rekomendasi bisnis
    output: 
        list user id yang belum punya bisnis
    """
    deserved_users = []
    for key,user in users.items():
        if user["status"] == "belum_punya_usaha":
            deserved_users.append(key)

    return deserved_users


In [330]:
users = {}
user_meta_data = {
    "user_id":"",
    "status":"",
    "bidang_keahlian":"",
    "hobi":"",
    "modal_usaha":"",
    "nama_usaha":""
}
df,new_df,users = ETL("user_input.csv")
deserved_users = get_users_who_deserved_list(users)
print((deserved_users))

[40, 41, 42, 43, 44, 45, 46, 47, 48, 49]


In [331]:
# Check users metadata
print(users)

{0: {'user_id': 0, 'status': 'udah_punya_usaha', 'bidang_keahlian': 'kuliner', 'hobi': 'makan, travelling', 'modal_usaha': 'under_50', 'nama_usaha': 'nama_usaha-0'}, 1: {'user_id': 1, 'status': 'udah_punya_usaha', 'bidang_keahlian': 'homecare, kuliner', 'hobi': 'nonton, baca, travelling, membaca, makan', 'modal_usaha': 'under_50', 'nama_usaha': 'nama_usaha-1'}, 2: {'user_id': 2, 'status': 'udah_punya_usaha', 'bidang_keahlian': 'homecare', 'hobi': 'makan, nonton, membaca', 'modal_usaha': 'under_50', 'nama_usaha': 'nama_usaha-2'}, 3: {'user_id': 3, 'status': 'udah_punya_usaha', 'bidang_keahlian': 'healtcare, kuliner', 'hobi': 'membaca, nonton', 'modal_usaha': 'under_50', 'nama_usaha': 'nama_usaha-3'}, 4: {'user_id': 4, 'status': 'udah_punya_usaha', 'bidang_keahlian': 'homecare, healtcare', 'hobi': 'nonton, baca, olahraga', 'modal_usaha': 'under_50', 'nama_usaha': 'nama_usaha-4'}, 5: {'user_id': 5, 'status': 'udah_punya_usaha', 'bidang_keahlian': 'healtcare, homecare, kuliner', 'hobi': 't

In [367]:
        
def stem(txt):
    y = []
    for t in txt.split():
        y.append(porterStemmer.stem(t))
    return " ".join(y)
    
def getSimilarityMatrix():
    cv = CountVectorizer(max_features = 5000)
    vectors = cv.fit_transform(new_df.features).toarray()
    porterStemmer = PorterStemmer()
    new_df.features = new_df.features.apply(stem)
    similarityMatrix = cs(vectors)
    # print("Similarity Matrix:\n",similarityMatrix) 

    return similarityMatrix

def getRecommendation(user_id, deserved_users, num_recommendation):
    """
    input:
        user id yang belum memiliki bisnis
    deskripsi:
        Fungsi untuk mendapatkan rekomendasi bisnis
        output rekomendasi berdasarkan user2 yang sudah punya bisnis
    output:
        nama usaha rekomendasi
    """
    rekomendasi_teratas = []

    if user_id not in deserved_users:
        print("Ganti nomor user yg belum punya usaha!")
    else:
        index = new_df[new_df["user_id"] == str(user_id)].index[0]
        similarityMatrix = getSimilarityMatrix()
        distance = similarityMatrix[index]
        userRank = sorted(list(enumerate(distance)), reverse = True, key = lambda x:x[1])
        # print("Recommendation Ranking:\n",userRank)

        print("List Rekomendasi ", num_recommendation, " teratas:")
        count = 0

        """
        The Recommendation system only for he doesn't has any business
        or who has status "belum_punya usaha". So, they will be recommended 
        the business from who have any business. The status "udah_punya_usaha"
        will be skipped since they are not relevant to be recommended to the users
        """
        for i in userRank:
            # print(i[0])

            ## User ID
            if users[i[0]]["status"] == "udah_punya_usaha" and user_id != str(i[0]) :
                rekomendasi_teratas.append(i)
                count+=1

            if count >= num_recommendation:
                break
            
        for item in rekomendasi_teratas:
            print(users[item[0]]["nama_usaha"])

In [369]:
print("Pilih salah 1 dari user ini")
print(deserved_users)
getRecommendation(49, deserved_users, 5)
# users

Pilih salah 1 dari user ini
[40, 41, 42, 43, 44, 45, 46, 47, 48, 49]
List Rekomendasi  5  teratas:
nama_usaha-32
nama_usaha-20
nama_usaha-39
nama_usaha-9
nama_usaha-26


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  new_df.features = new_df.features.apply(stem)
