In [3]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
# Data yang diberikan
data = [['Alice','Item_1',5],['Alice','Item_2',3],['Alice','Item_3',4],['Alice','Item_4',4],
  ['User1','Item_1',3],['User1','Item_2',1],['User1','Item_3',2],['User1','Item_4',3],['User1','Item_5',3],
  ['User2','Item_1',4],['User2','Item_2',3],['User2','Item_3',4],['User2','Item_4',3],['User2','Item_5',5],
  ['User3','Item_1',3],['User3','Item_2',3],['User3','Item_3',1],['User3','Item_4',5],['User3','Item_5',4],
  ['User4','Item_1',1],['User4','Item_2',5],['User4','Item_3',5],['User4','Item_4',2],['User4','Item_5',1]]

ratings = pd.DataFrame(data, columns=['User', 'Item', 'Rating'])
print("Data Rating Awal:")
print(ratings)
print(f"Dimensi data: {ratings.shape}")
print("\n")

Data Rating Awal:
     User    Item  Rating
0   Alice  Item_1       5
1   Alice  Item_2       3
2   Alice  Item_3       4
3   Alice  Item_4       4
4   User1  Item_1       3
5   User1  Item_2       1
6   User1  Item_3       2
7   User1  Item_4       3
8   User1  Item_5       3
9   User2  Item_1       4
10  User2  Item_2       3
11  User2  Item_3       4
12  User2  Item_4       3
13  User2  Item_5       5
14  User3  Item_1       3
15  User3  Item_2       3
16  User3  Item_3       1
17  User3  Item_4       5
18  User3  Item_5       4
19  User4  Item_1       1
20  User4  Item_2       5
21  User4  Item_3       5
22  User4  Item_4       2
23  User4  Item_5       1
Dimensi data: (24, 3)




In [15]:
# Fungsi untuk membuat user-item matrix dari data mentah
def create_user_item_matrix(ratings_df):
    """Membuat user-item matrix dari dataframe rating"""
    user_item_matrix = ratings_df.pivot_table(
        index='User', 
        columns='Item', 
        values='Rating'
    )
    return user_item_matrix

# Fungsi untuk menghitung cosine similarity
def calculate_cosine_similarity(matrix):
    """Menghitung cosine similarity antara item-item"""
    # Mengisi nilai NaN dengan 0 untuk perhitungan similarity
    matrix_filled = matrix.fillna(0)
    similarity = cosine_similarity(matrix_filled.T)
    similarity_df = pd.DataFrame(
        similarity, 
        index=matrix.columns, 
        columns=matrix.columns
    )
    return similarity_df

# Fungsi untuk menghitung adjusted cosine similarity
def calculate_adjusted_cosine_similarity(matrix):
    """Menghitung adjusted cosine similarity antara item-item"""
    # Menghitung rating rata-rata setiap user
    user_means = matrix.mean(axis=1)
    
    # Mengurangi setiap rating dengan rata-rata user
    adjusted_matrix = matrix.sub(user_means, axis=0)
    
    # Mengisi nilai NaN dengan 0 untuk perhitungan similarity
    adjusted_matrix_filled = adjusted_matrix.fillna(0)
    
    similarity = cosine_similarity(adjusted_matrix_filled.T)
    similarity_df = pd.DataFrame(
        similarity, 
        index=matrix.columns, 
        columns=matrix.columns
    )
    return similarity_df

# Fungsi untuk prediksi rating dengan berbagai opsi
def predict_rating(user, item, similarity_matrix, user_item_matrix, threshold=0.5, k_neighbors=None):
    """Memprediksi rating untuk user pada item tertentu"""
    if item not in user_item_matrix.columns:
        return f"Item {item} tidak ada dalam data"
    
    if user not in user_item_matrix.index:
        return f"User {user} tidak ada dalam data"
    
    # Item yang sudah di-rating oleh user
    user_rated_items = user_item_matrix.loc[user].dropna().index
    
    if len(user_rated_items) == 0:
        return f"User {user} belum memberikan rating apapun"
    
    # Kumpulkan similarity dan rating untuk item yang di-rating user
    similarities_and_ratings = []
    
    for rated_item in user_rated_items:
        if rated_item == item:
            continue
            
        similarity = similarity_matrix.loc[rated_item, item]
        rating = user_item_matrix.loc[user, rated_item]
        similarities_and_ratings.append((similarity, rating, rated_item))
    
    # Filter berdasarkan threshold
    filtered_items = [(sim, rating) for sim, rating, item_name in similarities_and_ratings if sim > threshold]
    
    # Jika menggunakan k-neighbors, ambil k similarity tertinggi
    if k_neighbors and len(filtered_items) > k_neighbors:
        filtered_items.sort(key=lambda x: x[0], reverse=True)
        filtered_items = filtered_items[:k_neighbors]
    
    if len(filtered_items) == 0:
        return f"Tidak ada item serupa dengan similarity di atas threshold {threshold}"
    
    # Hitung prediksi
    numerator = sum(sim * rating for sim, rating in filtered_items)
    denominator = sum(abs(sim) for sim, rating in filtered_items)
    
    predicted_rating = numerator / denominator
    return predicted_rating

# Fungsi untuk evaluasi prediksi
def evaluate_predictions(actual_ratings, predicted_ratings):
    """Mengevaluasi kualitas prediksi"""
    mae = np.mean([abs(actual - pred) for actual, pred in zip(actual_ratings, predicted_ratings)])
    rmse = np.sqrt(np.mean([(actual - pred)**2 for actual, pred in zip(actual_ratings, predicted_ratings)]))
    return mae, rmse

In [16]:
user_item_matrix = create_user_item_matrix(ratings)
print("User-Item Matrix:")
print(user_item_matrix)
print(f"Dimensi matrix: {user_item_matrix.shape}")
print("\n")

User-Item Matrix:
Item   Item_1  Item_2  Item_3  Item_4  Item_5
User                                         
Alice     5.0     3.0     4.0     4.0     NaN
User1     3.0     1.0     2.0     3.0     3.0
User2     4.0     3.0     4.0     3.0     5.0
User3     3.0     3.0     1.0     5.0     4.0
User4     1.0     5.0     5.0     2.0     1.0
Dimensi matrix: (5, 5)




In [17]:
print("=== COSINE SIMILARITY ===")
cosine_sim = calculate_cosine_similarity(user_item_matrix)
print("Cosine Similarity Matrix:")
print(cosine_sim.round(3))
print("\n")

print("=== ADJUSTED COSINE SIMILARITY ===")
adjusted_cosine_sim = calculate_adjusted_cosine_similarity(user_item_matrix)
print("Adjusted Cosine Similarity Matrix:")
print(adjusted_cosine_sim.round(3))
print("\n")

=== COSINE SIMILARITY ===
Cosine Similarity Matrix:
Item    Item_1  Item_2  Item_3  Item_4  Item_5
Item                                          
Item_1   1.000   0.780   0.820   0.943   0.759
Item_2   0.780   1.000   0.942   0.848   0.673
Item_3   0.820   0.942   1.000   0.784   0.622
Item_4   0.943   0.848   0.784   1.000   0.812
Item_5   0.759   0.673   0.622   0.812   1.000


=== ADJUSTED COSINE SIMILARITY ===
Adjusted Cosine Similarity Matrix:
Item    Item_1  Item_2  Item_3  Item_4  Item_5
Item                                          
Item_1   1.000  -0.940  -0.547   0.268   0.714
Item_2  -0.940   1.000   0.621  -0.361  -0.853
Item_3  -0.547   0.621   1.000  -0.881  -0.764
Item_4   0.268  -0.361  -0.881   1.000   0.433
Item_5   0.714  -0.853  -0.764   0.433   1.000




In [18]:
alice_ratings = ratings[ratings['User'] == 'Alice']
all_items = ratings['Item'].unique()
alice_unrated_items = [item for item in all_items if item not in alice_ratings['Item'].values]

print(f"Item yang sudah di-rating Alice: {list(alice_ratings['Item'].values)}")
print(f"Item yang belum di-rating Alice: {alice_unrated_items}")
print("\n")

Item yang sudah di-rating Alice: ['Item_1', 'Item_2', 'Item_3', 'Item_4']
Item yang belum di-rating Alice: ['Item_5']




In [19]:
thresholds = [0.0, 0.3, 0.5, 0.7]

results_comparison = []

for threshold in thresholds:
    print(f"=== PREDIKSI DENGAN THRESHOLD = {threshold} ===")
    
    cosine_predictions = []
    adjusted_predictions = []
    
    print("\n1. MENGGUNAKAN COSINE SIMILARITY:")
    for item in alice_unrated_items:
        prediction = predict_rating('Alice', item, cosine_sim, user_item_matrix, threshold)
        if isinstance(prediction, float):
            print(f"Prediksi rating Alice untuk {item}: {prediction:.3f}")
            cosine_predictions.append(prediction)
        else:
            print(f"Prediksi rating Alice untuk {item}: {prediction}")
            cosine_predictions.append(None)
    
    print("\n2. MENGGUNAKAN ADJUSTED COSINE SIMILARITY:")
    for item in alice_unrated_items:
        prediction = predict_rating('Alice', item, adjusted_cosine_sim, user_item_matrix, threshold)
        if isinstance(prediction, float):
            print(f"Prediksi rating Alice untuk {item}: {prediction:.3f}")
            adjusted_predictions.append(prediction)
        else:
            print(f"Prediksi rating Alice untuk {item}: {prediction}")
            adjusted_predictions.append(None)
    
    # Simpan hasil untuk perbandingan
    results_comparison.append({
        'threshold': threshold,
        'cosine_predictions': cosine_predictions,
        'adjusted_predictions': adjusted_predictions
    })
    
    print("\n" + "="*60 + "\n")

=== PREDIKSI DENGAN THRESHOLD = 0.0 ===

1. MENGGUNAKAN COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 4.030

2. MENGGUNAKAN ADJUSTED COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 4.622


=== PREDIKSI DENGAN THRESHOLD = 0.3 ===

1. MENGGUNAKAN COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 4.030

2. MENGGUNAKAN ADJUSTED COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 4.622


=== PREDIKSI DENGAN THRESHOLD = 0.5 ===

1. MENGGUNAKAN COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 4.030

2. MENGGUNAKAN ADJUSTED COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 5.000


=== PREDIKSI DENGAN THRESHOLD = 0.7 ===

1. MENGGUNAKAN COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 4.483

2. MENGGUNAKAN ADJUSTED COSINE SIMILARITY:
Prediksi rating Alice untuk Item_5: 5.000




In [20]:
print("=== ANALISIS KOMPREHENSIF ===")
print("\nPerbandingan Similarity untuk Item_5 (item yang akan diprediksi):")

print("\nCosine Similarity dengan item lain:")
for item in user_item_matrix.columns:
    if item != 'Item_5':
        sim = cosine_sim.loc[item, 'Item_5']
        print(f"Similarity(Item_5, {item}): {sim:.3f}")

print("\nAdjusted Cosine Similarity dengan item lain:")
for item in user_item_matrix.columns:
    if item != 'Item_5':
        sim = adjusted_cosine_sim.loc[item, 'Item_5']
        print(f"Similarity(Item_5, {item}): {sim:.3f}")

=== ANALISIS KOMPREHENSIF ===

Perbandingan Similarity untuk Item_5 (item yang akan diprediksi):

Cosine Similarity dengan item lain:
Similarity(Item_5, Item_1): 0.759
Similarity(Item_5, Item_2): 0.673
Similarity(Item_5, Item_3): 0.622
Similarity(Item_5, Item_4): 0.812

Adjusted Cosine Similarity dengan item lain:
Similarity(Item_5, Item_1): 0.714
Similarity(Item_5, Item_2): -0.853
Similarity(Item_5, Item_3): -0.764
Similarity(Item_5, Item_4): 0.433


In [21]:
print("\n=== TABEL PERBANDINGAN HASIL PREDIKSI ===")
comparison_df = pd.DataFrame({
    'Item': alice_unrated_items,
    'Cosine_Threshold_0.0': [results_comparison[0]['cosine_predictions'][i] for i in range(len(alice_unrated_items))],
    'Cosine_Threshold_0.3': [results_comparison[1]['cosine_predictions'][i] for i in range(len(alice_unrated_items))],
    'Cosine_Threshold_0.5': [results_comparison[2]['cosine_predictions'][i] for i in range(len(alice_unrated_items))],
    'Adjusted_Threshold_0.0': [results_comparison[0]['adjusted_predictions'][i] for i in range(len(alice_unrated_items))],
    'Adjusted_Threshold_0.3': [results_comparison[1]['adjusted_predictions'][i] for i in range(len(alice_unrated_items))],
    'Adjusted_Threshold_0.5': [results_comparison[2]['adjusted_predictions'][i] for i in range(len(alice_unrated_items))]
})

print(comparison_df.round(3))


=== TABEL PERBANDINGAN HASIL PREDIKSI ===
     Item  Cosine_Threshold_0.0  Cosine_Threshold_0.3  Cosine_Threshold_0.5  \
0  Item_5                  4.03                  4.03                  4.03   

   Adjusted_Threshold_0.0  Adjusted_Threshold_0.3  Adjusted_Threshold_0.5  
0                   4.622                   4.622                     5.0  


In [22]:
print("\n=== REKOMENDASI UNTUK ALICE ===")
best_threshold = 0.5
final_cosine_pred = predict_rating('Alice', 'Item_5', cosine_sim, user_item_matrix, best_threshold)
final_adjusted_pred = predict_rating('Alice', 'Item_5', adjusted_cosine_sim, user_item_matrix, best_threshold)

if isinstance(final_cosine_pred, float) and isinstance(final_adjusted_pred, float):
    print(f"Dengan threshold {best_threshold}:")
    print(f"Cosine Similarity memprediksi rating: {final_cosine_pred:.3f}")
    print(f"Adjusted Cosine Similarity memprediksi rating: {final_adjusted_pred:.3f}")
    
    if abs(final_cosine_pred - final_adjusted_pred) < 0.1:
        print("Kedua metode memberikan prediksi yang similar")
    else:
        print("Terdapat perbedaan signifikan antara kedua metode")
        
    # Rekomendasi berdasarkan rata-rata
    final_prediction = (final_cosine_pred + final_adjusted_pred) / 2
    print(f"\nRekomendasi rating untuk Item_5: {final_prediction:.3f}")
    print(f"Kategori: {'Tinggi' if final_prediction >= 4 else 'Sedang' if final_prediction >= 3 else 'Rendah'}")


=== REKOMENDASI UNTUK ALICE ===
Dengan threshold 0.5:
Cosine Similarity memprediksi rating: 4.030
Adjusted Cosine Similarity memprediksi rating: 5.000
Terdapat perbedaan signifikan antara kedua metode

Rekomendasi rating untuk Item_5: 4.515
Kategori: Tinggi
