In [57]:
#pip install -qU langchain-gemini
#%pip install sklearn
#%pip install -U langchain-community
#!pip install langchain-google-genai
!pip install --upgrade torch streamlit

Defaulting to user installation because normal site-packages is not writeable


In [58]:
import pandas as pd
import numpy as np
from datetime import datetime
from sklearn.preprocessing import LabelEncoder
from sentence_transformers import SentenceTransformer
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI
import streamlit as st

## MERGE TABLES

In [44]:
customers = pd.read_csv(r"D:\GAMIC\PORTFOLIO\CLOTHING RETAIL - PRS POWERED BY LLM\01_Clothing_Retail_Synthetic_Data_Creation\customers.csv")
transactions = pd.read_csv(r"D:\GAMIC\PORTFOLIO\CLOTHING RETAIL - PRS POWERED BY LLM\01_Clothing_Retail_Synthetic_Data_Creation\transactions.csv")
products = pd.read_csv(r"D:\GAMIC\PORTFOLIO\CLOTHING RETAIL - PRS POWERED BY LLM\01_Clothing_Retail_Synthetic_Data_Creation\products.csv")
interactions = pd.read_csv(r"D:\GAMIC\PORTFOLIO\CLOTHING RETAIL - PRS POWERED BY LLM\01_Clothing_Retail_Synthetic_Data_Creation\interactions.csv")


merged_data = pd.merge(transactions, products, on="product_id")
merged_data = pd.merge(merged_data, customers, on="customer_id")


merged_data = pd.merge(
    merged_data,
    interactions,
    on=["customer_id", "product_id"],
    how="left",  
    suffixes=("_txn", "_int")
)

## CLEAN DATASETS

In [45]:
merged_data = merged_data.drop(columns=['email', 'stock'])

In [46]:
merged_data = merged_data.dropna(subset=[
    "customer_id", 
    "product_id", 
    "category",
    "preferred_style",
    "event_type"  
])

merged_data = merged_data.drop_duplicates(
    subset=["customer_id", "product_id", "event_timestamp"]
)

In [47]:
# Normalize dataframe columns
merged_data["gender"] = merged_data["gender"].str.lower()
merged_data["formality"] = merged_data["formality"].str.capitalize()

## COMBINE FEATURES FRO PRODUCTS 

In [48]:
# Count interactions by client-product
interaction_counts = interactions.groupby(
    ["customer_id", "product_id"]
)["event_type"].count().reset_index(name="total_interactions")

# Last event by client-producto
last_interaction = interactions.sort_values("event_timestamp").groupby(
    ["customer_id", "product_id"]
).last().reset_index()[["customer_id", "product_id", "event_type"]]
last_interaction.rename(columns={"event_type": "ultimo_evento"}, inplace=True)

merged_data = pd.merge(merged_data, interaction_counts, on=["customer_id", "product_id"], how="left")
merged_data = pd.merge(merged_data, last_interaction, on=["customer_id", "product_id"], how="left")

# Semantic descipttion combining features
products["text_description"] = (
    "Category: " + products["category"] + ". " +
    "Subcategory: " + products["subcategory"] + ". " +
    "Style: " + products["formality"] + ". " +
    "Color: " + products["color"] + ". " +
    "Materials: " + products["materials"] + ". " +
    "Season: " + products["season"] + ". " +
    "Brand type: " + products["brand_tier"]
)

# Calculate ratio interaction-transaction
merged_data["ratio_interaccion_compra"] = (
    merged_data["total_interactions"] / merged_data.groupby("customer_id")["total_interactions"].transform("sum")
)

## CATEGORIC DATA NORMALIZATION

In [49]:
encoder = LabelEncoder()
merged_data["event_type_encoded"] = encoder.fit_transform(merged_data["event_type"])

merged_data["hora_interaccion"] = pd.to_datetime(merged_data["event_timestamp"]).dt.hour
merged_data["dia_semana_interaccion"] = pd.to_datetime(merged_data["event_timestamp"]).dt.dayofweek

## SEMANTIC EMBEDDINGS CREATION

In [50]:
from sentence_transformers import SentenceTransformer

# Embeddings for products
model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")
product_embeddings = model.encode(products["text_description"])
products["embedding_producto"] = product_embeddings.tolist()

# Embeddings for customers behaviors
client_interactions = merged_data.groupby("customer_id")["event_type"].agg(list).reset_index()
client_interactions["comportamiento_texto"] = client_interactions["event_type"].apply(lambda x: " ".join(x))
client_embeddings = model.encode(client_interactions["comportamiento_texto"])
client_interactions["embedding_cliente"] = client_embeddings.tolist()

merged_data = pd.merge(merged_data, client_interactions[["customer_id", "embedding_cliente"]], on="customer_id")

## TEMPORAL DATA PREPARATION

In [51]:
merged_data.to_parquet("dataset_preprocesado.parquet", index=False)
products.to_parquet("products_embeddings.parquet", index=False)

## VECTORIAL DATABASE CONFIGURATION

In [52]:
# Load prodicts embeddings
products = pd.read_parquet("products_embeddings.parquet")
embeddings_list = np.array(products["embedding_producto"].tolist(), dtype=np.float32)

# Create FAISS index
vector_db = FAISS.from_embeddings(
    text_embeddings=list(zip(products["text_description"], embeddings_list)),
    embedding=HuggingFaceEmbeddings(model_name="paraphrase-multilingual-MiniLM-L12-v2")
)


vector_db.save_local("faiss_index")

## GEMINI INTEGRATION

In [53]:
from langchain.chains import RetrievalQA
from langchain_google_genai import ChatGoogleGenerativeAI

# Configure Gemini
llm = ChatGoogleGenerativeAI(
    model="gemini-pro",
    google_api_key="AIzaSyBO0JURYEXi-up4dvUbjwjonSRpZcB92TU",  # Reemplazar con tu clave
    temperature=0.3  # Controla la creatividad (0 = preciso, 1 = creativo)
)

# Build rcomendation chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    retriever=vector_db.as_retriever(search_kwargs={"k": 5}),  # Top 5 resultados
    chain_type="stuff",
    input_key="query"
)

## RECOMENDATION LOGIC

In [54]:
def get_recommendations(customer_id: int):
    # Get customer history
    customer_data = merged_data[merged_data["customer_id"] == customer_id]
    
    # Extract last relevant interaction
    last_interaction = customer_data.sort_values("event_timestamp").iloc[-1]
    query_text = f"""
        Cliente de {last_interaction['age']} aÃ±os, gÃ©nero {last_interaction['gender']},
        ubicado en {last_interaction['location']}. Ãšltima interacciÃ³n: {last_interaction['ultimo_evento']}
        con producto: {last_interaction['texto_producto']}.
    """
    
    # Search for similar products and generate explanation 
    response = qa_chain.run(
        f"Recomienda 3 productos personalizados para este perfil. Contexto: {query_text}"
    )
    
    return response

## STREAMLIT INTERFACE

In [55]:
import streamlit as st

st.set_page_config(page_title="Asistente de Moda", layout="wide")
st.title("ðŸŽ¨ Recomendador Personalizado de Moda")

# User input
customer_id = st.number_input("Ingrese su ID de cliente:", min_value=1)

if customer_id:
    try:
        # Get recommendations
        with st.spinner("Buscando las mejores opciones para ti..."):
            recommendations = get_recommendations(customer_id)
        
        # Show results
        st.subheader("ðŸ’¡ Recomendaciones basadas en tu estilo:")
        st.markdown(recommendations)
        
        # Show recent history
        st.subheader("ðŸ“š Tu Historial Reciente")
        st.dataframe(
            merged_data[merged_data["customer_id"] == customer_id][
                ["product_name", "category", "purchase_date"]
            ].tail(3)
        )
    
    except Exception as e:
        st.error(f"Error: {str(e)}")



## Run and Test the System

In [56]:
#streamlit run app.py

In [2]:
!pip install google.generativeai

Collecting google.generativeai
  Using cached google_generativeai-0.8.4-py3-none-any.whl.metadata (4.2 kB)
Collecting google-ai-generativelanguage==0.6.15 (from google.generativeai)
  Using cached google_ai_generativelanguage-0.6.15-py3-none-any.whl.metadata (5.7 kB)
Collecting google-api-core (from google.generativeai)
  Using cached google_api_core-2.24.2-py3-none-any.whl.metadata (3.0 kB)
Collecting google-api-python-client (from google.generativeai)
  Using cached google_api_python_client-2.166.0-py2.py3-none-any.whl.metadata (6.6 kB)
Collecting google-auth>=2.15.0 (from google.generativeai)
  Using cached google_auth-2.38.0-py2.py3-none-any.whl.metadata (4.8 kB)
Collecting pydantic (from google.generativeai)
  Using cached pydantic-2.11.1-py3-none-any.whl.metadata (63 kB)
Collecting proto-plus<2.0.0dev,>=1.22.3 (from google-ai-generativelanguage==0.6.15->google.generativeai)
  Using cached proto_plus-1.26.1-py3-none-any.whl.metadata (2.2 kB)
Collecting googleapis-common-protos<2.0

In [4]:
import google.generativeai as genai

genai.configure(api_key="AIzaSyBO0JURYEXi-up4dvUbjwjonSRpZcB92TU")
print(genai.list_models())  # Debe mostrar "gemini-1.5-pro-latest"

<generator object list_models at 0x000001AC4030EB90>
