In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import gradio as gr
from PIL import Image, ImageTk
from faker import Faker
from transformers import pipeline
from keybert import KeyBERT

In [None]:
kw_model = KeyBERT(model='all-mpnet-base-v2')

# Basic Dataset erstellen

3 Kategorien: Hotels, Restaurants & Aktivitäten

In [None]:
df_rest = pd.read_csv('01_Data\dataset_Google-Maps-Reviews-Restaurants_2023-03-26_09-43-08-320.csv')
df_act = pd.read_csv('01_Data\dataset_Google-Maps-Reviews-Activities_2023-03-26_11-08-15-435.csv')
df_hotel = pd.read_csv('01_Data\dataset_Google-Maps-Reviews-Hotels_2023-03-26_11-34-16-492.csv')

print('Restaurants (Shape): ', df_rest.shape)
print('Activities (Shape): ', df_act.shape)
print('Hotels (Shape): ', df_hotel.shape)

In [None]:
columns_to_keep = [
    "stars",
    "publishedAtDate",
    "name",
    "text",
    "title",
    "subTitle",
    "description",
    "price",
    "totalScore",
    "likesCount",
    "isLocalGuide",
    "reviewId",
    "categoryName",
    "reviewImageUrls/0",
    "reviewImageUrls/1",
    "reviewImageUrls/2",
    "reviewImageUrls/3",
    "reviewImageUrls/4",
    "reviewImageUrls/5",
    "reviewImageUrls/6",
    "reviewImageUrls/7",
    "reviewImageUrls/8",
    "reviewImageUrls/9",
    "reviewUrl",
    "reviewerId",
    "reviewerNumberOfReviews",
    "reviewerPhotoUrl",
    "reviewerUrl",
    "reviewsCount",
    "scrapedAt",
    "state",
    "street",
    "temporarilyClosed",
    "url",
    "website",

    # at least for restaurants
    "categories/0",
    "categories/1",
    "categories/2",
    "categoryName"
]

In [None]:
df_rest = df_rest.dropna(subset=["reviewImageUrls/0"])
df_rest = df_rest[columns_to_keep]
df_rest['genre'] = 'restaurant'
print(df_rest.shape)

In [None]:
df_act = df_act.dropna(subset=["reviewImageUrls/0"])
df_act = df_act[columns_to_keep]
df_act['genre'] = 'activity'
print(df_act.shape)

In [None]:
df_hotel = df_hotel.dropna(subset=["reviewImageUrls/0"])
df_hotel = df_hotel[columns_to_keep]
df_hotel['genre'] = 'hotel'
print(df_hotel.shape)

In [None]:
df = pd.concat([df_rest, df_act, df_hotel], ignore_index=True)
print(df.shape)
df.to_csv('all_rest_act_hotel.csv', index=False)

Resizing the images to 256-256 pixels via gettin the better url

In [None]:
image_urls_cols = [
    "reviewImageUrls/0",
    "reviewImageUrls/1",
    "reviewImageUrls/2",
    "reviewImageUrls/3",
    "reviewImageUrls/4",
    "reviewImageUrls/5",
    "reviewImageUrls/6",
    "reviewImageUrls/7",
    "reviewImageUrls/8",
    "reviewImageUrls/9"
]

for col in image_urls_cols:
    df[col] = df[col].str.replace("=w150-h150-k-no-p", "=w256-h256-p-k-no")

print(df.shape)

### Extract Keywords With KeyBert

In [None]:
df['text'] = df['text'].astype(str)
print("Convertion done.")

df['keywords'] = df['text'].apply(lambda x: kw_model.extract_keywords(x,keyphrase_ngram_range=(1,1) ,
                                     stop_words='english', 
                                     highlight=False,
                                     top_n=5))
print("Keywords extraction done.")
df.to_csv('base_keywords_sentiment.csv', index=False)


### Sentiment analyse

In [None]:
model_path = "cardiffnlp/twitter-xlm-roberta-base-sentiment"
sentiment_task = pipeline("sentiment-analysis", model=model_path, tokenizer=model_path)

In [34]:
df = pd.read_csv('base_keywords_sentiment.csv')
df['sentiment'] = np.nan
df['sentiment'] = df['sentiment'].astype(str)
df['text'] = df['text'].astype(str)
df['sent_score_0'] = np.nan

for idx, row in df.iterrows():
    try: 
        print("Processing row: ", idx)
        sentiment = sentiment_task(row['text'])
        df.at[idx, 'sentiment'] = sentiment
    except RuntimeError:
        print("Skipped due to messy data. Row: ", idx)
        pass
df.to_csv('base_keywords_sentiment.csv', index=False)

Processing row:  0
Processing row:  1
Processing row:  2
Processing row:  3
Processing row:  4
Processing row:  5
Processing row:  6
Processing row:  7
Processing row:  8
Processing row:  9
Processing row:  10
Processing row:  11
Processing row:  12
Processing row:  13
Processing row:  14
Processing row:  15
Processing row:  16
Processing row:  17
Processing row:  18
Processing row:  19
Processing row:  20
Processing row:  21
Processing row:  22
Processing row:  23
Processing row:  24
Processing row:  25
Processing row:  26
Processing row:  27
Processing row:  28
Processing row:  29
Processing row:  30
Processing row:  31
Processing row:  32
Processing row:  33
Processing row:  34
Processing row:  35
Processing row:  36
Processing row:  37
Processing row:  38
Processing row:  39
Processing row:  40
Processing row:  41
Processing row:  42
Processing row:  43
Processing row:  44
Processing row:  45
Processing row:  46
Processing row:  47
Processing row:  48
Processing row:  49
Processing

In [None]:
#only store the keywords in a list not the scores

# df['keywords_only'] = df['keywords'].apply(lambda x: [i[0] for i in x])
# print(df['keywords_only'])
# df.to_csv('base_keywords_sentiment.csv', index=False)

## Gradio Anwendung Versuch

### HTML Bewertung im Stil von Google Maps erstellen

In [None]:
def generate_review_html(stars, date, user, is_local_guide, text, user_image, review_images):
    full_stars = int(stars)
    half_star = (stars % 1) > 0
    empty_stars = 5 - full_stars - half_star
    full_star_html = '<span class="star full">&#9733;</span>'
    half_star_html = '<span class="star half">&#9733;</span>'
    empty_star_html = '<span class="star empty">&#9733;</span>'
    stars_html = (full_star_html * full_stars) + (half_star_html * half_star) + (empty_star_html * empty_stars)
    user_name = user if not is_local_guide else f"{user} (local guide)"
    
    html = f"""
    <style>
      .star.full {{
        color: #f1c40f;
      }}
      .star.half {{
        color: #f1c40f;
        position: relative;
      }}
      .star.half::before {{
        content: '\\2605';
        position: absolute;
        left: 0;
        color: #ddd;
      }}
      .star.empty {{
        color: #ddd;
      }}
    </style>
    <div class="review">
      <div class="user">
        <img src={user_image} alt="User profile picture">
        <div class="user-info">
          <h4>{user_name}</h4>
          <p>Submitted on {date}</p>
        </div>
      </div>
      <div class="rating">
        {stars_html}
      </div>
      <div class="review-text">
        <p>{text}</p>
      </div>
      <div class="review-image">
        <img src={review_images} alt="Restaurant image">
      </div>
    </div>
    """
    return html

In [None]:
def generate_deepfake(review_about, stars):
    text = fake.text()
    stars = fake.random_int(min=1, max=5, step=1)
    date = fake.date_between(start_date='-1y', end_date='today')
    user = fake.name()
    local_guide = fake.boolean(chance_of_getting_true=7)
    html_review = generate_review_html(stars, date, user, local_guide, text, user_image="https://via.placeholder.com/50/%22%20alt=/%22Restaurant%20image/", review_images = "https://lh5.googleusercontent.com/p/AF1QipPsYMpfUpwmH5ltkPNIeDbgPivvBstyJCea32vJ=w150-h150-k-no-p")
    return html_review

def verify_review(stars, date, user, local_guide, text, image):
    return "FAKE", "0.9"


In [None]:
with gr.Blocks() as demo:
    with gr.Tab ("Create Fake Review"):
        review_about = gr.Textbox(label="Review about")
        review_stars = gr.Slider(label="Stars", minimum=1, maximum=5, step=1)
        generate_btn = gr.Button("Generate Multi-Modal Review")
        html_output = gr.HTML(label="Review") 
        generate_btn.click(fn=generate_deepfake, inputs=[review_about, review_stars], outputs=[html_output])
    
    with gr.Tab ("Verify Review"):
        with gr.Row(): 
            input_stars = gr.Slider(label="Stars", minimum=1, maximum=5, step=1)
            input_date = gr.Textbox(label="Date", default="2023-01-01")
            input_user = gr.Textbox(label="User")
            input_local_guide = gr.Checkbox(label="Local Guide")
       
        input_text = gr.Textbox(label="Review Text")
        input_image = gr.Image(label="Review Image")
        verify_btn = gr.Button("Verify Review")
        output_text = gr.Textbox(label="Generated Text")
        output_score = gr.Textbox(label="Score")
        verify_btn.click(fn=verify_review, inputs=[input_stars, input_date, input_user, input_local_guide, input_text, input_image], outputs=[output_text, output_score])

demo.launch(debug=True)