In [16]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import StandardScaler

In [17]:
rental_df = pd.read_csv("https://raw.githubusercontent.com/Capstone-Rental-Marketplace/SewaIn/main/Dataset/rental_dataset.csv")
vehicle_df = pd.read_csv("https://raw.githubusercontent.com/Capstone-Rental-Marketplace/SewaIn/main/Dataset/vehicle_dataset.csv")
# Merge rental_df and vehicle_df based on id_vehicle
merged_df = rental_df.merge(vehicle_df, on='id_vehicle')
merged_df

Unnamed: 0,id_user,id_rental,id_vehicle,rating,sum_day_of_rental,total_pay,vehicle_name,engine_type,power,torque,colors,fuel_capacity,transmission,vehicle_type,number_of_seats,brand,model
0,8817,94,MTR0031,5,3,240000,Honda Scoopy,"Air-cooled, 4-stroke",8.8 hp,9.4 Nm,White,4.2 liters,CVT,Motor,2,Honda,Scoopy
1,7660,113,MTR0031,4,3,300000,Honda Scoopy,"Air-cooled, 4-stroke",8.8 hp,9.4 Nm,White,4.2 liters,CVT,Motor,2,Honda,Scoopy
2,7503,90,MTR0031,4,3,210000,Honda Scoopy,"Air-cooled, 4-stroke",8.8 hp,9.4 Nm,White,4.2 liters,CVT,Motor,2,Honda,Scoopy
3,10385,10,MTR0031,4,3,210000,Honda Scoopy,"Air-cooled, 4-stroke",8.8 hp,9.4 Nm,White,4.2 liters,CVT,Motor,2,Honda,Scoopy
4,434,9,MTR0031,5,3,270000,Honda Scoopy,"Air-cooled, 4-stroke",8.8 hp,9.4 Nm,White,4.2 liters,CVT,Motor,2,Honda,Scoopy
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14590,4458,49,MTR0036,5,2,180000,Yamaha NMAX,"Liquid-cooled, 4-stroke",15.1 hp,13.9 Nm,Grey,7.1 liters,CVT,Motor,2,Yamaha,NMAX
14591,7412,52,MTR0036,5,2,220000,Yamaha NMAX,"Liquid-cooled, 4-stroke",15.1 hp,13.9 Nm,Grey,7.1 liters,CVT,Motor,2,Yamaha,NMAX
14592,4727,118,MTR0036,5,2,240000,Yamaha NMAX,"Liquid-cooled, 4-stroke",15.1 hp,13.9 Nm,Grey,7.1 liters,CVT,Motor,2,Yamaha,NMAX
14593,6339,53,MTR0036,4,2,180000,Yamaha NMAX,"Liquid-cooled, 4-stroke",15.1 hp,13.9 Nm,Grey,7.1 liters,CVT,Motor,2,Yamaha,NMAX


In [25]:
def encode_features(data):
    data = data.copy()  # Membuat salinan data agar tidak memodifikasi data asli
    data = data[['id_user', 'id_vehicle', 'rating', 'colors', 'vehicle_type', 'brand']]

    # Bagian 1: Encoding fitur untuk df_user
    user_columns = ['id_user', 'id_vehicle', 'rating']  # Kolom yang tidak diencode
    user_data = data[user_columns].copy()  # Salin kolom-kolom yang tidak diencode
    user_data['data_features'] = data['colors'] + '_' + data['vehicle_type'] + '_' + data['brand']  # Gabungkan fitur 'colors', 'vehicle_type', dan 'brand'
    user_features = user_data['data_features'].str.get_dummies(sep='_')  # Encoding fitur
    df_user = pd.concat([user_data.drop(columns={'data_features', 'id_vehicle'}), user_features], axis=1)  # Gabungkan dengan DataFrame asli

    # Bagian 2: Encoding fitur untuk df_item
    item_columns = ['id_user']  # Kolom yang tidak diencode
    item_data = data.drop(columns=item_columns).copy()  # Salin kolom-kolom yang tidak diencode
    item_data['data_features'] = data['colors'] + '_' + data['vehicle_type'] + '_' + data['brand']  # Gabungkan fitur 'colors', 'vehicle_type', dan 'brand'
    item_features = item_data['data_features'].str.get_dummies(sep='_')  # Encoding fitur
    df_item = pd.concat([item_data.drop(columns={'data_features', 'colors', 'vehicle_type', 'brand'}), item_features], axis=1)  # Gabungkan dengan DataFrame asli

    return df_user, df_item

# Prepare data
encoded_df_user, encoded_df_item = encode_features(merged_df)
encoded_df_user

Unnamed: 0,id_user,rating,Black,Blue,Daihatsu,Green,Grey,Honda,Kawasaki,Mitsubishi,Mobil,Motor,Nissan,Red,Silver,Suzuki,Toyota,White,Yamaha,Yellow
0,8817,5,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0
1,7660,4,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0
2,7503,4,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0
3,10385,4,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0
4,434,5,0,0,0,0,0,1,0,0,0,1,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14590,4458,5,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0
14591,7412,5,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0
14592,4727,5,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0
14593,6339,4,0,0,0,0,1,0,0,0,0,1,0,0,0,0,0,0,1,0


In [27]:
num_item_columns = encoded_df_item.columns[2:]
num_user_columns = encoded_df_user.columns[2:]

In [28]:
for i in range(2, len(encoded_df_user.columns)):
    feature_column = encoded_df_user.columns[i]
    encoded_df_user[feature_column] = encoded_df_user.apply(lambda row: row['rating'] if row[feature_column] == 1 else np.nan,axis=1)

df_user_avg = encoded_df_user.groupby('id_user')[num_user_columns].mean().reset_index()
df_user_avg.fillna(0,inplace=True)
encoded_df_user = pd.merge(encoded_df_user,df_user_avg,how='left',on='id_user')
num_columns_to_keep = 1 + len(num_user_columns)
num_columns_to_drop = len(encoded_df_user.columns) - num_columns_to_keep
encoded_df_user.drop(columns=encoded_df_user.columns[1:num_columns_to_drop + 1], inplace=True)
encoded_df_user.columns = ['id_user'] + num_user_columns.tolist()
encoded_df_user

Unnamed: 0,id_user,Black,Blue,Daihatsu,Green,Grey,Honda,Kawasaki,Mitsubishi,Mobil,Motor,Nissan,Red,Silver,Suzuki,Toyota,White,Yamaha,Yellow
0,8817,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0
1,7660,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0
2,7503,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0,5.0,4.0,0.0,5.0,0.0,0.0,0.0,4.0,0.0,0.0
3,10385,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0
4,434,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14590,4458,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
14591,7412,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
14592,4727,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0
14593,6339,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,4.0,0.0,0.0,0.0,0.0,0.0,0.0,4.0,0.0


In [29]:

scaler_user = StandardScaler()
scaler_item = StandardScaler()

scaler_user.fit(encoded_df_user[num_user_columns])
scaler_item.fit(encoded_df_item[num_item_columns])

encoded_df_item[num_item_columns] = scaler_item.transform(encoded_df_item[num_item_columns])

In [31]:
encoded_df_item

Unnamed: 0,id_vehicle,rating,Black,Blue,Daihatsu,Green,Grey,Honda,Kawasaki,Mitsubishi,Mobil,Motor,Nissan,Red,Silver,Suzuki,Toyota,White,Yamaha,Yellow
0,MTR0031,5,-0.702642,-0.195445,-0.306052,-0.089896,-0.103267,1.487968,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,1.472882,-0.451564,-0.101214
1,MTR0031,4,-0.702642,-0.195445,-0.306052,-0.089896,-0.103267,1.487968,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,1.472882,-0.451564,-0.101214
2,MTR0031,4,-0.702642,-0.195445,-0.306052,-0.089896,-0.103267,1.487968,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,1.472882,-0.451564,-0.101214
3,MTR0031,4,-0.702642,-0.195445,-0.306052,-0.089896,-0.103267,1.487968,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,1.472882,-0.451564,-0.101214
4,MTR0031,5,-0.702642,-0.195445,-0.306052,-0.089896,-0.103267,1.487968,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,1.472882,-0.451564,-0.101214
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
14590,MTR0036,5,-0.702642,-0.195445,-0.306052,-0.089896,9.683632,-0.672057,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,-0.678941,2.214526,-0.101214
14591,MTR0036,5,-0.702642,-0.195445,-0.306052,-0.089896,9.683632,-0.672057,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,-0.678941,2.214526,-0.101214
14592,MTR0036,5,-0.702642,-0.195445,-0.306052,-0.089896,9.683632,-0.672057,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,-0.678941,2.214526,-0.101214
14593,MTR0036,4,-0.702642,-0.195445,-0.306052,-0.089896,9.683632,-0.672057,-0.166132,-0.318866,-1.296318,1.296318,-0.214852,-0.481404,-0.333904,-0.221912,-0.536712,-0.678941,2.214526,-0.101214


In [66]:
model = tf.keras.models.load_model('/content/rental_recommendation_model (1).h5')

# Define user input
user_id = 5000000000
colors = 'Black'
vehicle_type = 'Mobil'
brand = 'Toyota'
rating = 5

# Prepare new data for prediction
new_data = {
    'id_user': user_id,
    'colors': colors,
    'vehicle_type': vehicle_type,
    'rating': rating,
    'brand': brand
}
# Create a new DataFrame with the same columns as encoded_df_user
new_df_encoded = pd.DataFrame(columns=encoded_df_user.columns)
new_df_encoded.loc[0] = 0  # Initialize with zeros
new_df_encoded['id_user'] = user_id

# Set the values for matching columns
if colors in new_df_encoded.columns:
    new_df_encoded[colors] = rating
if vehicle_type in new_df_encoded.columns:
    new_df_encoded[vehicle_type] = rating
if brand in new_df_encoded.columns:
    new_df_encoded[brand] = rating
new_df_encoded

Unnamed: 0,id_user,Black,Blue,Daihatsu,Green,Grey,Honda,Kawasaki,Mitsubishi,Mobil,Motor,Nissan,Red,Silver,Suzuki,Toyota,White,Yamaha,Yellow
0,5000000000,5,0,0,0,0,0,0,0,5,0,0,0,0,0,5,0,0,0


In [67]:

new_df_encoded[num_user_columns] = scaler_user.transform(new_df_encoded[num_user_columns])

new_user = np.tile(new_df_encoded[num_user_columns], (encoded_df_item.shape[0], 1))
new_user

array([[ 1.49679973, -0.20222926, -0.31850434, ..., -0.70573376,
        -0.46698269, -0.10485442],
       [ 1.49679973, -0.20222926, -0.31850434, ..., -0.70573376,
        -0.46698269, -0.10485442],
       [ 1.49679973, -0.20222926, -0.31850434, ..., -0.70573376,
        -0.46698269, -0.10485442],
       ...,
       [ 1.49679973, -0.20222926, -0.31850434, ..., -0.70573376,
        -0.46698269, -0.10485442],
       [ 1.49679973, -0.20222926, -0.31850434, ..., -0.70573376,
        -0.46698269, -0.10485442],
       [ 1.49679973, -0.20222926, -0.31850434, ..., -0.70573376,
        -0.46698269, -0.10485442]])

In [68]:
# Make predictions
predictions = model.predict([new_user, encoded_df_item[num_item_columns]])




In [69]:
from sklearn.preprocessing import MinMaxScaler


In [70]:
rating = merged_df['rating'].values
scaler = MinMaxScaler((-1,1))
scaler.fit(rating.reshape(-1,1))
rating = scaler.transform(rating.reshape(-1,1))
predictions = scaler.inverse_transform(predictions)
sorted_predictions = np.argsort(predictions, axis=0)[::-1].flatten()
sorted_item = merged_df.index.to_numpy()[sorted_predictions].flatten()


In [71]:
data_test = merged_df.copy()
data_test = data_test[['id_user', 'id_rental', 'id_vehicle', 'rating', 'vehicle_name','colors', 'number_of_seats']]

In [72]:
dic_predictions = {
    'userId': np.full((encoded_df_item.shape[0],), user_id),
    'index': merged_df.iloc[sorted_item].index,
    'predictions': predictions[sorted_predictions].flatten()
}
df_predictions = pd.DataFrame(dic_predictions)
df_predictions.set_index('index', inplace=True)
df_predictions = pd.merge(df_predictions, data_test, how='left', left_index=True, right_index=True).reset_index(drop=True)
df_predictions.drop_duplicates(subset=['id_vehicle'], inplace=True)
df_predictions.drop(columns=['id_user', 'rating'], inplace=True)
df_predictions.rename(columns={'id_user_x': 'id_user'}, inplace=True)
df_predictions.reset_index(drop=True,inplace=True)
df_predictions.head(10)

Unnamed: 0,userId,predictions,id_rental,id_vehicle,vehicle_name,colors,number_of_seats
0,5000000000,3.991671,29,MBL001,Suzuki Ertiga,Black,7
1,5000000000,3.934905,25,MBL0024,Toyota Rush,Black,7
2,5000000000,3.934905,73,MBL0027,Toyota Innova,Black,8
3,5000000000,3.934905,29,MBL0019,Toyota Avanza,Black,7
4,5000000000,3.934905,36,MBL0031,Toyota Kijang Innova,Black,8
5,5000000000,3.934905,115,MBL0035,Toyota Fortuner,Black,7
6,5000000000,3.85479,113,MTR0029,Yamaha MT-15,Black,2
7,5000000000,3.85479,48,MTR0038,Yamaha MT-25,Black,2
8,5000000000,3.85479,99,MTR001,Yamaha XMAX,Black,2
9,5000000000,3.85479,110,MTR0017,Yamaha Lexi,Black,2
