In [18]:
import os
import csv
from google.colab import drive
import pandas as pd
!pip install rouge
!pip install scikit-learn
!pip install sentence-transformers scikit-learn rouge nltk

# 주소 세팅하기
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision.models import resnet18
from transformers import BertTokenizer, BertModel
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from torchvision import transforms
from sentence_transformers import SentenceTransformer, util
from rouge import Rouge
from sklearn.feature_extraction.text import CountVectorizer
from nltk.translate.bleu_score import sentence_bleu
import numpy as np

# Google Drive 만용트
drive.mount('/content/drive')

# CSV 파일 경로
csv_file_path = "/content/drive/MyDrive/paper modeling/11.recomm/data/LLM추천결과.csv"

# CSV 파일 로드
df = pd.read_csv(csv_file_path)

# NaN 값 제거
df = df.dropna()
# Error 값 제거
df = df[~df.apply(lambda row: row.astype(str).str.contains("Error").any(), axis=1)]

print(df.head())

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
  사용자ID  나이   성별                                              구매상품명  \
0   ID6  23   남성                      Clinique For Men Skincare Set   
1   ID7  29   여성   Estée Lauder Double Wear Stay-in-Place Founda...   
2   ID8  32   남성                       NIVEA Men After Shave Lotion   
5  ID11  26   여성                                 MAC Matte Lipstick   
6  ID12  31   남성              Neutrogena Deep Clean Facial Cleanser   

                         상품설명  \
0   피부를 편안하게 관리할 수 있는 스킨케어 세트   
1      피부 결점을 커버해주는 고커버 파운데이션   
2      피부 진정과 보습을 도와주는 애프터쉐이브   
5          매트한 피니시의 고급스러운 립스틱   
6           피부를 깨끗하게 세정하는 클렌저   

                                                 이미지    매출액  \
0  /content/drive/MyDrive/paper modeling/11.recom...  15000   
1  /content/drive/MyDrive/paper modeling/11.recom...  40000   
2  /content/drive/MyDrive/paper modeling/11.recom...  70

In [29]:

# 탑크 다른 프로세스 만들기 (ResNet + BERT)
class ProductRecommendationModel(nn.Module):
    def __init__(self):
        super(ProductRecommendationModel, self).__init__()

        # ResNet for Image Features
        self.resnet = resnet18(pretrained=True)
        self.resnet.fc = nn.Identity()

        # BERT for Text Features
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
        self.bert = BertModel.from_pretrained('bert-base-uncased')

        # Fully Connected Layers
        self.fc1 = nn.Linear(512 + 768 + 2, 256)  # Image (512) + Text (768) + User Info (3)
        self.fc_sales = nn.Linear(256, 1)

    def forward(self, image, text, user_info):
        # Image Features
        image_features = self.resnet(image)

        # Text Features
        tokens = self.tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=128)
        bert_output = self.bert(**tokens)
        text_features = bert_output.pooler_output

        # Combine Features
        combined_features = torch.cat((image_features, text_features, user_info), dim=1)
        x = F.relu(self.fc1(combined_features))
        predicted_sales = self.fc_sales(x)

        return predicted_sales

# Image preprocessing
image_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])


# 사용자-상품 정보를 활용할 Dataset 객체 정의
csv_path = "/content/drive/MyDrive/paper modeling/11.keyword/data/LLM추천결과.csv"
class InteractionDataset(Dataset):
    def __init__(self, csv_path, image_transform=None):
        self.data = pd.read_csv(csv_path)
        self.transform = image_transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        row = self.data.iloc[idx]

        # 사용자 정보
        user_id = row['사용자ID']
        user_age = row['나이']
        user_gender = row['성별']
        # 상품 명 및 설명
        product_name = row['구매상품명']
        product_description = row['상품설명']
        # 상품 이미지 채워넣기
        image_path = row['이미지']
        image = Image.open(image_path).convert('RGB')
        if self.transform:
            image = self.transform(image)
        # 매출액
        salescount = row['매출액']
        review = row['리뷰']
        reviewscore = row['평점']
        search_history = row['검색기록']
        recomm = row['상품추천결과']

        other_info = torch.tensor([
            user_age,
            reviewscore
        ], dtype=torch.float32 , requires_grad=True)
        # nan 값 처리
        other_info = torch.nan_to_num(other_info, nan=0.0, posinf=1e6, neginf=-1e6)


        return user_id, user_age, user_gender, product_name, product_description, image_path, image, salescount, review, reviewscore, search_history,recomm, other_info




In [60]:

###########################################################################################
# 예측을 수행할CSV 파일 경로 -> dataset , dataloader에서 불러옴
###########################################################################################
csv_path = "/content/drive/MyDrive/paper modeling/11.recomm/data/LLM추천결과.csv"

# Create dataset and dataloader
dataset = InteractionDataset(csv_path, image_transform=image_transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=False)
for user_id, user_age, user_gender, product_name, product_description, image_path, image, salescount, review, reviewscore, search_history, recomm, other_info in dataloader:
    #print("Images batch:", images.size())
    print("product_name  :", product_name)

# Initialize model
model = ProductRecommendationModel()

# Training loop (simplified)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.MSELoss()  # For continuous value prediction (sales)
#criterion_sex = nn.CrossEntropyLoss()
#criterion_age = nn.CrossEntropyLoss()

###########################################################################################
# 예측을 수행할CSV 파일 dataset , dataloader에서 불러옴
###########################################################################################
# 결과 저장용 리스트
results = []

target_mae = 30
epochs = 100  #lr=1e-3 / 5000에폭만에 원본매출액에맞는 예측에도달, [9000,9000,3000,3000] 9000까지도달할때까지, 3000이 먼저도달후 다시감소함
for epoch in range(epochs):
    total_samples = 0
    total_loss = 0
    total_absolute_error = 0  # Mean Absolute Error (MAE)

    # 엑셀값 한줄씩 배열로 데이터제공
    for user_id, user_age, user_gender, product_name, product_description, image_path, image, salescount, review, reviewscore, search_history, recomm, other_info in dataloader:       ###예측하고싶은 csv데이터를 CustomDataset로 로드한다 (dataset을상속한 CustomDataset의 __get__item호출됨)
        optimizer.zero_grad()

        ##########################################################################################################################################
        # LLM 추천상품의 예상매출액
        #############################################################################################################################################
        pred_sales = model(image, recomm, other_info).squeeze()

        salescount = salescount / 1000
        print("===sales===" + str(salescount) + "===pred_sales===" + str(pred_sales))
        total_samples += salescount.size(0)

        # Compute loss
        total_loss += F.mse_loss(pred_sales, salescount, reduction="sum").item()
        total_absolute_error += torch.sum(torch.abs(pred_sales - salescount)).item()

        ###loss = criterion(pred_sales.squeeze(), sales)
        ###total_loss += loss.item()

        # Backward pass
        loss = criterion(pred_sales.squeeze(), salescount.type(torch.float32))
        loss.backward()
        optimizer.step()

        # Predictions and accuracy
        # pred_sex_classes = pred_sex.argmax(dim=1)
        # pred_age_classes = pred_age.argmax(dim=1)
        # 연속값 예측은 argmax로불가, 예측-실제의 오차줄이는방법
        # MSE :평균제곱오차로 전체적손실 계산 (낮을수록좋음)
        # MAE : 예측값,실제값 차이의 절대값의 평균
    mse = total_loss / total_samples
    mae = total_absolute_error / total_samples
    print(f'Epoch {epoch + 1}/{epochs}, "Evaluation - MSE: {mse:.4f}, MAE: {mae:.4f}"')

    ################################################################################
    # 학습종료시 매출액 예측수행
    ################################################################################
    # 학습 종료 조건
    if mae < target_mae:
        print(f"학습 종료: MAE가 {target_mae:.2f} 이하로 감소했습니다.")

        # 모델을 평가 모드로 전환
        model.eval()

        # 동일한 상태에서 결과를 비교하기 위해 그래디언트 계산 비활성화
        with torch.no_grad():
            for batch in dataloader:  # 전체 데이터셋 반복
                user_id, user_age, user_gender, product_name, product_description, image_path, image, salescount, review, reviewscore, search_history, recomm, other_info = batch

                for i in range(len(salescount)):  #배열안의갯수
                    print(len(salescount))
                    print(product_name[i])

                    # 추천데이터의 매출액예측
                    pred_sales = model(image, recomm, other_info).squeeze()

                    # 결과 저장
                    results.append({
                        "사용자ID": product_name[i].item() if isinstance(product_name[i], torch.Tensor) else product_name[i],
                        "나이": user_age[i].item() if isinstance(user_age[i], torch.Tensor) else user_age[i],
                        "성별": user_gender[i].item() if isinstance(user_gender[i], torch.Tensor) else user_gender[i],
                        "구매상품명": product_name[i].item() if isinstance(product_name[i], torch.Tensor) else product_name[i],
                        "상품설명": product_description[i].item() if isinstance(product_description[i], torch.Tensor) else product_description[i],
                        "이미지": image_path[i] if isinstance(image_path, list) else image_path,
                        "매출액": salescount[i].item() if isinstance(salescount[i], torch.Tensor) else salescount[i],
                        "##예측 매출액": pred_sales[i].item() if pred_sales.dim() > 0 else pred_sales.item(),
                        "리뷰": review[i].item() if isinstance(review[i], torch.Tensor) else review[i],
                        "평점": reviewscore[i].item() if isinstance(reviewscore[i], torch.Tensor) else reviewscore[i],
                        "검색기록": search_history[i].item() if isinstance(search_history[i], torch.Tensor) else search_history[i]
                    })

            # 학습 종료
            break

# 결과를 DataFrame으로 변환
results_df = pd.DataFrame(results)
# 결과 저장 경로
output_path = "/content/drive/MyDrive/paper modeling/11.recomm/data/매출예측결과.xlsx"
# Google Drive에 저장
results_df.to_excel(output_path, index=False)
print(f"학습 결과가 저장되었습니다: {output_path}")


product_name  : (' Clinique For Men Skincare Set', ' Estée Lauder Double Wear Stay-in-Place Foundation', ' NIVEA Men After Shave Lotion', ' Urban Decay Naked Eyeshadow Palette', " Kiehl's Ultra Facial Moisturizer", ' MAC Matte Lipstick', ' Neutrogena Deep Clean Facial Cleanser', ' La Roche-Posay Anthelios Sunscreen SPF 50')
product_name  : (' American Crew Fiber Hair Gel',)




===sales===tensor([15., 40., 70., 30., 25., 22., 55., 95.])===pred_sales===tensor([ 0.1145, -0.0253,  0.0685, -0.0559, -0.0576, -0.0261,  0.2741, -0.0503],
       grad_fn=<SqueezeBackward0>)
===sales===tensor([18.])===pred_sales===tensor(0.6188, grad_fn=<SqueezeBackward0>)


  total_loss += F.mse_loss(pred_sales, salescount, reduction="sum").item()
  return F.mse_loss(input, target, reduction=self.reduction)


Epoch 1/100, "Evaluation - MSE: 2340.2202, MAE: 41.0155"
===sales===tensor([15., 40., 70., 30., 25., 22., 55., 95.])===pred_sales===tensor([2.8852, 4.0132, 6.3812, 4.4999, 4.1122, 3.5595, 6.4948, 6.8548],
       grad_fn=<SqueezeBackward0>)
===sales===tensor([18.])===pred_sales===tensor(9.6681, grad_fn=<SqueezeBackward0>)
Epoch 2/100, "Evaluation - MSE: 1900.8360, MAE: 35.7257"
===sales===tensor([15., 40., 70., 30., 25., 22., 55., 95.])===pred_sales===tensor([ 7.3222,  9.5220, 20.8860,  8.6452,  8.4006,  7.8038, 10.7826, 25.7186],
       grad_fn=<SqueezeBackward0>)
===sales===tensor([18.])===pred_sales===tensor(17.1192, grad_fn=<SqueezeBackward0>)
Epoch 3/100, "Evaluation - MSE: 1232.1126, MAE: 28.2000"
학습 종료: MAE가 30.00 이하로 감소했습니다.
8
 Clinique For Men Skincare Set
8
 Estée Lauder Double Wear Stay-in-Place Foundation
8
 NIVEA Men After Shave Lotion
8
 Urban Decay Naked Eyeshadow Palette
8
 Kiehl's Ultra Facial Moisturizer
8
 MAC Matte Lipstick
8
 Neutrogena Deep Clean Facial Cleanser
8
