In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
import pickle
from sklearn.metrics.pairwise import cosine_similarity

In [4]:
filepath = "/kaggle/input/fashion-product-images-small/styles.csv"

df = pd.read_csv(filepath, on_bad_lines='skip')

df.head()

Unnamed: 0,id,gender,masterCategory,subCategory,articleType,baseColour,season,year,usage,productDisplayName
0,15970,Men,Apparel,Topwear,Shirts,Navy Blue,Fall,2011.0,Casual,Turtle Check Men Navy Blue Shirt
1,39386,Men,Apparel,Bottomwear,Jeans,Blue,Summer,2012.0,Casual,Peter England Men Party Blue Jeans
2,59263,Women,Accessories,Watches,Watches,Silver,Winter,2016.0,Casual,Titan Women Silver Watch
3,21379,Men,Apparel,Bottomwear,Track Pants,Black,Fall,2011.0,Casual,Manchester United Men Solid Black Track Pants
4,53759,Men,Apparel,Topwear,Tshirts,Grey,Summer,2012.0,Casual,Puma Men Grey T-shirt


In [5]:
df = df[['id', 'articleType', 'usage']]

In [None]:
valid_article_types = [
    'Shirts', 'Tshirts', 'Jeans', 'Casual Shoes', 'Flip Flops',
    'Tops', 'Sandals', 'Sweatshirts', 'Formal Shoes', 'Flats',
    'Sports Shoes', 'Heels', 'Dresses', 'Backpacks', 'Caps',
    'Trousers', 'Jackets', 'Sweaters', 'Skirts'
]
valid_usages = ['Casual', 'Formal', 'Sports']

filtered_df = df[df['articleType'].isin(valid_article_types) & df['usage'].isin(valid_usages)]

final_df = (
    filtered_df
    .groupby(['articleType', 'usage'], group_keys=False)
    .apply(lambda x: x.sample(n=min(len(x), 20), random_state=42))
    .reset_index(drop=True)
)

final_df.head()

  .apply(lambda x: x.sample(n=min(len(x), 20), random_state=42))


Unnamed: 0,id,articleType,usage
0,21248,Backpacks,Casual
1,25880,Backpacks,Casual
2,25870,Backpacks,Casual
3,32864,Backpacks,Casual
4,38738,Backpacks,Casual


In [None]:
combination_counts = final_df.groupby(['usage', 'articleType']).size().reset_index(name='count')

print(combination_counts.to_string(index=False))

 usage  articleType  count
Casual    Backpacks     20
Casual         Caps     20
Casual Casual Shoes     20
Casual      Dresses     20
Casual        Flats     20
Casual   Flip Flops     20
Casual Formal Shoes     20
Casual        Heels     20
Casual      Jackets     20
Casual        Jeans     20
Casual      Sandals     20
Casual       Shirts     20
Casual       Skirts     20
Casual Sports Shoes     20
Casual     Sweaters     20
Casual  Sweatshirts     20
Casual         Tops     20
Casual     Trousers     20
Casual      Tshirts     20
Formal Casual Shoes     14
Formal      Dresses      2
Formal        Flats      1
Formal Formal Shoes     20
Formal        Heels     20
Formal      Sandals      9
Formal       Shirts     20
Formal       Skirts     13
Formal         Tops      3
Formal     Trousers     20
Formal      Tshirts      2
Sports    Backpacks     20
Sports         Caps     20
Sports Casual Shoes     12
Sports        Flats      2
Sports   Flip Flops      7
Sports      Jackets     20
S

In [None]:
final_df['combined_features'] = final_df['articleType'] + ' ' + final_df['usage']

tfidf = TfidfVectorizer()
tfidf_matrix = tfidf.fit_transform(final_df['combined_features'])

def compute_similarity_matrix(vectors):
    n = vectors.shape[0]
    similarity_matrix = np.zeros((n, n))
    
    for i in range(n):
        for j in range(i, n):  
            vector1 = vectors[i]
            vector2 = vectors[j]
            
            dot_product = np.dot(vector1, vector2)
            
            norm1 = np.linalg.norm(vector1)
            norm2 = np.linalg.norm(vector2)
            
            similarity = dot_product / (norm1 * norm2) if norm1 > 0 and norm2 > 0 else 0
            similarity_matrix[i, j] = similarity
            similarity_matrix[j, i] = similarity 
        
    return similarity_matrix

tfidf_array = tfidf_matrix.toarray()

cosine_sim_manual = compute_similarity_matrix(tfidf_array)

model = {
    'tfidf': tfidf,
    'cosine_sim': cosine_sim_manual
}

with open('trained_model_manual.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

print("Model berhasil dilatih dengan perhitungan cosine similarity manual dan disimpan.")

Model berhasil dilatih dengan perhitungan cosine similarity manual dan disimpan.


-----------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
with open('/kaggle/working/trained_model_manual.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

tfidf = model['tfidf']
cosine_sim = model['cosine_sim']

data_cloud = (
    filtered_df
    .groupby(['articleType', 'usage'], group_keys=False)
    .apply(lambda x: x.sample(n=min(len(x), 20), random_state=3))
    .reset_index(drop=True)
)

data_cloud['combined_features'] = data_cloud['articleType'] + ' ' + data_cloud['usage']

  .apply(lambda x: x.sample(n=min(len(x), 20), random_state=3))


In [10]:
data_cloud.head()

Unnamed: 0,id,articleType,usage,combined_features
0,25880,Backpacks,Casual,Backpacks Casual
1,25224,Backpacks,Casual,Backpacks Casual
2,51324,Backpacks,Casual,Backpacks Casual
3,24328,Backpacks,Casual,Backpacks Casual
4,53365,Backpacks,Casual,Backpacks Casual


In [None]:
tfidf_matrix_cloud = tfidf.transform(data_cloud['combined_features'])

cosine_sim_cloud = cosine_similarity(tfidf_matrix_cloud, tfidf_matrix_cloud)

def get_recommendations(product_id, cosine_sim, data_cloud):
    idx = data_cloud.index[data_cloud['id'] == product_id].tolist()[0]

    sim_scores = list(enumerate(cosine_sim[idx]))

    sim_scores = sorted(sim_scores, key=lambda x: x[1], reverse=True)

    sim_scores = sim_scores[1:4]

    product_indices = [i[0] for i in sim_scores]

    recommended_products = data_cloud.iloc[product_indices]

    return recommended_products[['id', 'articleType', 'usage']]

product_id_to_recommend = 25880  # Misalnya, kita ingin mendapatkan rekomendasi untuk produk dengan ID 101
recommended_products = get_recommendations(product_id_to_recommend, cosine_sim_cloud, data_cloud)

print("Rekomendasi produk untuk ID:", product_id_to_recommend)
print(recommended_products)

Rekomendasi produk untuk ID: 25880
      id articleType   usage
1  25224   Backpacks  Casual
2  51324   Backpacks  Casual
3  24328   Backpacks  Casual
