# Translation

In [2]:
import os
import re
from deep_translator import GoogleTranslator
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
from time import sleep

# Temizleme fonksiyonu - Noktalama eksikliklerini gidermek için
def clean_text(text):
    # Çift boşlukları tek boşluğa indirir, hatalı noktalama işaretlerini düzeltir
    text = re.sub(r'\s+', ' ', text)
    text = re.sub(r'([.!?])([A-Za-z])', r'\1 \2', text)  # Noktadan sonra boşluk ekler
    return text.strip()

# Cümleleri ayırma fonksiyonu
def split_into_sentences(text):
    sentences = re.split(r'(?<=[.!?])\s+', text)  # Noktalama işaretlerine göre cümleleri ayırır
    return [sentence for sentence in sentences if sentence]  # Boş cümleleri filtreler

# Çeviri fonksiyonu - Hata yönetimi ile
def translate_text(text_chunks):
    translator = GoogleTranslator(source='auto', target='en')
    translated_text = ''
    
    for i, chunk in enumerate(text_chunks):
        try:
            # Çeviri işlemi yap
            translated_chunk = translator.translate(chunk)
            translated_text += translated_chunk + ' '
            print(f"Chunk {i + 1}/{len(text_chunks)} successfully translated.")
        except Exception as e:
            print(f"Error: {e} - Retrying chunk {i + 1}")
            sleep(1)  # Küçük bir gecikme koyarak tekrar deneme
            try:
                translated_chunk = translator.translate(chunk)
                translated_text += translated_chunk + ' '
            except Exception as e:
                print(f"Failed again for chunk {i + 1}. Skipping this chunk.")
                continue  # Hata varsa chunk atlanır

    return translated_text.strip()


# Dosya işleme ve çeviri fonksiyonu
def process_and_translate(input_dir, translated_dir):
    comments = []
    for filename in os.listdir(input_dir):
        if filename.endswith('.txt'):
            input_file = os.path.join(input_dir, filename)

            with open(input_file, 'r', encoding='utf-8') as file:
                turkish_txt = file.read()
            
            # Temizleme işlemi
            cleaned_turkish_txt = clean_text(turkish_txt)

            # Metni cümlelere ayır
            sentences = split_into_sentences(cleaned_turkish_txt)

            # Tüm metni tek bir yorum olarak listede sakla
            comments.append(' '.join(sentences))

            # Çevirilmiş metni bir dosyaya yaz
            translated_text = translate_text(sentences)
            translated_file = os.path.join(translated_dir, f"translated_{filename}")
            with open(translated_file, 'w', encoding='utf-8') as output_file:
                output_file.write(translated_text)
                
            print(f"Çevirilmiş metin '{filename}' olarak '{translated_file}' klasörüne kaydedildi.")
    
    return comments


# Word Correction

In [3]:
# import os
# from spellchecker import SpellChecker

# # Initialize SpellChecker for English
# spell = SpellChecker()

# # Specify the file paths
# input_file = translated_filename  # Input file
# print(input_file)

# output_dir = "../final_comments/sentiment_analyzed/corrected_comments"

# # Çıktı dizinini, eğer yoksa, oluştur
# os.makedirs(output_dir, exist_ok=True)

# # Çıktı dosyasını oluştur
# output_file = os.path.join(output_dir, f"corrected_{os.path.basename(translated_filename)}")
# corrected_translated_filename = output_file
# print(output_file)


# # Function to correct misspelled words
# def correct_file(input_path, output_path):
#     with open(input_path, 'r', encoding='utf-8') as f:
#         content = f.read()

#     total_words = len(content.split())
#     corrected_content = []
#     for i, word in enumerate(content.split()):
#         # Correct each word
#         corrected_word = spell.correction(word)
#         corrected_content.append(corrected_word)
#         # Show the progress
#         print(f"Process completed: {i+1}/{total_words}")

#     # Save the corrected content to a new file
#     with open(output_path, 'w', encoding='utf-8') as f:
#         f.write(' '.join(filter(None, corrected_content)))  # Filter out None values

#     print(f"Corrected file saved as: {output_path}")


# # Run the program
# correct_file(input_file, output_file)


# Preprocess and Sentiment Analysis


In [4]:
import re
import os
from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer

# Sentiment analiz fonksiyonu
def analyze_sentiment(text):
    analyzer = SentimentIntensityAnalyzer()
    sentiment_scores = analyzer.polarity_scores(text)
    neg_score = sentiment_scores['neg']
    pos_score = sentiment_scores['pos']
    return neg_score, pos_score

# Anahtar kelimeyi içeren cümleleri bulma fonksiyonu
def extract_sentences_with_keyword(text, keyword):
    # Metni cümlelere ayır
    sentences = re.split(r'(?<=[.!?])\s+', text)
    
    # Anahtar kelimeyi içeren cümleleri filtrele
    relevant_sentences = [sentence for sentence in sentences if keyword.lower() in sentence.lower()]
    
    return relevant_sentences

# Kategorilere göre anahtar kelimeler
categories = {
    "Su Kaydırağı": ["water slide", "aqua park", "slide", "splash", "pool games", "fun for kids", "waterpark", "pool", "water", "swimming", "poolside", "pool area", "pool facilities", "pool activities", "pool entertainment", "pool fun", "pool games", "pool activities", "pool entertainment", "pool fun", "pool games", "pool activities", "pool entertainment", "pool fun"],
    "Hizmet Kalitesi": ["service quality", "staff", "customer service", "friendly staff", "helpful", "efficient service","staff","service"],
    "Hijyenik Tatil": ["hygienic holiday", "cleanliness", "sanitation", "health safety", "hygiene standards", "clean facilities","hygiene","clean"],
    "Bebekli Tatil (0-2 Yaş)": ["baby-friendly", "infant", "0-2 years", "baby amenities", "stroller", "baby pool", "family","baby","children"],
    "Çocuklu Tatil (2-10 yaş)": ["child-friendly", "family", "kids", "2-10 years", "children's activities", "kids club", "family pool","children",],
    "Uzaktan Çalışma": ["remote work", "work-friendly", "coworking", "office space", "internet access", "quiet areas", "productivity"],
    "Evcil Hayvanla Tatil": ["pet-friendly", "dogs", "cats", "animals allowed", "pet amenities", "pet care"],
    "Dalış": ["diving", "scuba", "snorkeling", "underwater", "marine life", "sea adventure"],
    "Parti Deneyimi": ["party", "nightlife", "music", "DJ", "dancing", "celebration", "fun activities"],
    "Kumsal": ["beach", "sand", "shore", "sunbathing", "sea", "waves", "beach activities"],
    "Rahat Uyku Deneyimi": ["comfortable sleep", "quiet rooms", "bedding", "mattress", "restful night", "relaxation", "calm"],
    "Spor Alanları": ["sports facilities", "fitness", "gym", "basketball", "tennis", "sports activities", "wellness"],
    "Gece Eğlence Programı": ["night entertainment", "evening shows", "night events", "live music", "dancing", "performances"],
    "Sadece Yetişkin Tatili": ["adults-only", "no kids", "couples", "private areas", "adult entertainment", "relaxation"],
    "Çocuk Havuz Alanı": ["kids pool", "shallow pool", "children's pool", "family-friendly", "safe water play"],
    "Ulaşım İmkanları": ["transportation", "transport", "public transport", "taxi", "airport", "bus", "train", "car rental", "parking", "location", "distance", "nearby", "central", "walking distance", "area", "place", "situated"],
    "Spa and Wellness": [" spa ", "wellness", "massage", "sauna", "relaxation", "treatment", "health", "thermal", "hammam", "jacuzzi", "steam room", "therapy"],
    "Balayı": ["honeymoon", "romantic", "couple's retreat", "private", "romantic dinner", "special suite"],
    "Özel Menü": ["special menu", "dietary needs", "vegan", "vegetarian", "gluten-free", "customized meals", "meals"],
}

# translated_comments klasöründeki tüm dosyaları işle
translated_comments_dir = "../translated_comments"
for filename in os.listdir(translated_comments_dir):
    if filename.endswith(".txt"):
        input_file = os.path.join(translated_comments_dir, filename)
        
        # Dosyayı oku
        with open(input_file, 'r', encoding='utf-8') as file:
            input_text = file.read()

        # Tüm kategoriler için sonuçları birleştir ve sentiment analizi yap
        all_results = {}
        for category, keywords in categories.items():
            category_results = []
            for keyword in keywords:
                result = extract_sentences_with_keyword(input_text, keyword)
                category_results.extend(result)
            all_results[category] = category_results

        # Yorum bulunan konu başlıklarını bir liste olarak yazdır
        yorum_bulunan_konular = [category for category, results in all_results.items() if results]
        
        # Otel ismini dosya adından çıkar
        hotel_name = filename.replace("translated_", "").replace(".txt", "").replace("_", " ").title()
        print("Otel İsmi:", hotel_name)
        print()

        print("Yorum bulunan konu başlıkları:", yorum_bulunan_konular)
        print()

        # final_results klasörünü oluştur (eğer yoksa)
        os.makedirs("../final_results", exist_ok=True)

        # Sonuçları yazacağımız dosyayı aç
        with open(f"../final_results/{hotel_name}.txt", "w", encoding="utf-8") as f:
            f.write(f"Otel İsmi: {hotel_name}\n\n")
            f.write(f"Yorum bulunan konu başlıkları: {yorum_bulunan_konular}\n\n")
            
            # Tüm kategoriler için sentiment analizi yap ve rating'e göre sırala
            sorted_categories = sorted(all_results.items(), key=lambda x: len(x[1]), reverse=True)
            for category, results in sorted_categories:
                if len(results) > 0:  # Yorum sayısı 0 ise yazdırma
                    results_text = ' '.join(results)
                    neg_score, pos_score = analyze_sentiment(results_text)
                    print(neg_score, pos_score)
                    if neg_score + pos_score > 0:  # Payda sıfır olmasın diye kontrol et
                        if neg_score == 0:  # Payda sıfır olasılığını kontrol et
                            rating = 10  # Payda sıfır ise rating'i 10 olarak ayarla
                        else:
                            rating = ((pos_score)*10)/(pos_score+neg_score)
                    else:
                        continue  # Payda sıfır ise yazdırma
                        
                    f.write(f"{category} için rating: {rating:.1f}\n")
                    f.write(f"{category} için toplam yorum sayısı: {len(results)}\n")
                    f.write(f"{category} için tüm sonuçlar metni: {results_text}\n\n")
                    
                    # Ekrana da yazdır
                    print(f"{category} için rating: {rating:.1f}")
                    print(f"{category} için toplam yorum sayısı: {len(results)}")
                    print(f"{category} için tüm sonuçlar metni: {results_text}")
                    print()


Otel İsmi: Aska Bayview Resort Hotel

Yorum bulunan konu başlıkları: ['Su Kaydırağı', 'Hizmet Kalitesi', 'Hijyenik Tatil', 'Bebekli Tatil (0-2 Yaş)', 'Çocuklu Tatil (2-10 yaş)', 'Dalış', 'Parti Deneyimi', 'Kumsal', 'Rahat Uyku Deneyimi', 'Spor Alanları', 'Gece Eğlence Programı', 'Sadece Yetişkin Tatili', 'Çocuk Havuz Alanı', 'Ulaşım İmkanları', 'Spa and Wellness', 'Balayı', 'Özel Menü']

0.027 0.328
Hizmet Kalitesi için rating: 9.2
Hizmet Kalitesi için toplam yorum sayısı: 246
Hizmet Kalitesi için tüm sonuçlar metni: The hotel's location and service quality were very good, and the sea was perfect. We ate both the service quality and delicious food with the money we paid. Especially, there is a service quality that does not suit a hotel belonging to a chain group such as Aska hotels. Yorum: A super hotel|The quality of service, the friendliness of the staff, the food and beverage service, the attention at the hotel is very nice, it is a perfect family hotel, I would definitely recommend

# Summarization but won't use it for now

In [5]:
# from transformers import pipeline
# import re
# import os
# import csv

# # Hugging Face özetleme pipeline'ını başlatın
# summarizer = pipeline("summarization", model="sshleifer/distilbart-cnn-12-6")

# # Kategorilere göre anahtar kelimeler
# categories = {
#     "Su Kaydırağı": ["water slide", "aqua park", "slide", "splash", "pool games", "fun for kids"],
#     "Kadın-Kadına Tatil": ["women-only", "female-friendly", "ladies holiday", "women’s retreat", "privacy", "women’s events"],
#     "Erkek-Erkeğe Tatil": ["men-only", "male-friendly", "boys trip", "men's retreat", "male activities", "sports"],
#     "Bebekli Tatil (0-2 Yaş)": ["baby-friendly", "infant", "0-2 years", "baby amenities", "stroller", "baby pool", "family"],
#     "Çocuklu Tatil (2-10 yaş)": ["child-friendly", "family", "kids", "2-10 years", "children’s activities", "kids club", "family pool"],
#     "Uzaktan Çalışma": ["remote work", "work-friendly", "coworking", "office space", "internet access", "quiet areas", "productivity"],
#     "Evcil Hayvanla Tatil": ["pet-friendly", "dogs", "cats", "animals allowed", "pet amenities", "pet care"],
#     "Dalış": ["diving", "scuba", "snorkeling", "underwater", "marine life", "sea adventure"],
#     "Parti Deneyimi": ["party", "nightlife", "music", "DJ", "dancing", "celebration", "fun activities"],
#     "Kumsal": ["beach", "sand", "shore", "sunbathing", "sea", "waves", "beach activities"],
#     "Rahat Uyku Deneyimi": ["comfortable sleep", "quiet rooms", "bedding", "mattress", "restful night", "relaxation", "calm"],
#     "Spor Alanları": ["sports facilities", "fitness", "gym", "basketball", "tennis", "sports activities", "wellness"],
#     "Gece Eğlence Programı": ["night entertainment", "evening shows", "night events", "live music", "dancing", "performances"],
#     "Sadece Yetişkin Tatili": ["adults-only", "no kids", "couples", "private areas", "adult entertainment", "relaxation"],
#     "Çocuk Havuz Alanı": ["kids pool", "shallow pool", "children’s pool", "family-friendly", "safe water play"],
#     "Özel Sahil Kullanımı": ["private beach", "exclusive access", "beach amenities", "private area", "sunbeds"],
#     "Özel Menü": ["special menu", "dietary needs", "vegan", "vegetarian", "gluten-free", "customized meals"],
#     "Balayı": ["honeymoon", "romantic", "couple’s retreat", "private", "romantic dinner", "special suite"],
#     "Sessiz Restoran": ["quiet restaurant", "peaceful dining", "relaxed atmosphere", "noise-free", "intimate dining"],
#     "Spa ve Wellness": ["spa", "wellness", "massage", "sauna", "relaxation", "treatment", "health"]
# }


# # Metni parçalar halinde özetleme fonksiyonu
# def summarize_chunked_text(text, max_chunk_size=1000):
#     summaries = []
#     for i in range(0, len(text), max_chunk_size):
#         chunk = text[i:i+max_chunk_size]
#         # Özetleme sırasında max_length'i otomatik olarak ayarla
#         summary = summarizer(chunk, max_length=min(150, len(chunk)//2), min_length=30, do_sample=False)
#         summaries.append(summary[0]['summary_text'])
#     return " ".join(summaries)

# # Cümleleri kategorilere göre sınıflandırma
# def categorize_and_summarize(text, categories, chunk_size=500):
#     final_summary = {}
#     categorized_sentences = {category: [] for category in categories}
#     sentences = re.split(r'(?<=[.!?])\s+', text)

#     # Cümleleri kategorilere ayırma
#     for sentence in sentences:
#         for category, keywords in categories.items():
#             if any(keyword in sentence.lower() for keyword in keywords):
#                 categorized_sentences[category].append(sentence)
#                 break

#     # Her kategori için özet oluşturma
#     for category, sentences in categorized_sentences.items():
#         if sentences:
#             text = " ".join(sentences)
#             final_summary[category] = summarize_chunked_text(text, max_chunk_size=chunk_size)
    
#     return final_summary

# # translated_comments klasöründeki tüm dosyaları okuma ve kategori bazında özet oluşturma
# with open('sonuclar.csv', mode='w', newline='', encoding='utf-8') as file:
#     writer = csv.writer(file)
#     writer.writerow(['Dosya Adı', 'Kategori', 'Özet'])
#     for filename in os.listdir('../translated_comments'):
#         if filename.endswith('.txt'):
#             with open(f'../translated_comments/{filename}', 'r', encoding='utf-8') as file:
#                 text = file.read()
#                 final_summary = categorize_and_summarize(text, categories, chunk_size=500)
#                 for category, summary in final_summary.items():
#                     writer.writerow([filename, category, summary])


# Extract and assign all experiences of hotels

In [6]:
import os
import ast

def extract_categories_and_ratings(file_path):
    categories = {}
    with open(file_path, 'r', encoding='utf-8') as f:
        lines = f.readlines()
        for line in lines:
            if "Yorum bulunan konu başlıkları:" in line:
                # Convert string list to Python list
                categories["başlıklar"] = ast.literal_eval(line.split("Yorum bulunan konu başlıkları: ")[1].strip())
            if "için rating:" in line:
                category = line.split(" için rating:")[0].strip()
                rating = float(line.split("rating:")[1].strip())
                categories[category] = rating
        return categories
    
# Dictionary mapping experience IDs to names
experience_mapping = {
    1:  "Su Kaydırağı",
    2:  "Hizmet Kalitesi", 
    3:  "Hijyenik Tatil",
    4:  "Bebekli Tatil (0-2 Yaş)",
    5:  "Çocuklu Tatil (2-10 yaş)",
    6:  "Uzaktan Çalışma", 
    7:  "Evcil Hayvanla Tatil",
    8:  "Dalış",
    9:  "Parti deneyimi",
    10: "Kumsal",
    11: "Rahat Uyku Deneyimi",
    12: "Spor Alanları",
    13: "Gece Eğlence Programı", 
    14: "Sadece Yetişkin Tatili",
    15: "Çocuk Havuz Alanı",
    17: "Özel Menü",
    18: "Balayı",
    19: "Ulaşım İmkanları",
    20: "Spa ve Wellness"
}

# Create reverse dictionary mapping experience names to IDs
experience_name_to_id = {v: k for k, v in experience_mapping.items()}

# Process all files in translated_comments directory
final_results_dir = "../final_results"
for filename in os.listdir(final_results_dir):
    if filename.endswith(".txt"):
        file_path = os.path.join(final_results_dir, filename)
        categories_and_ratings = extract_categories_and_ratings(file_path)
        # Remove .txt extension from filename
        hotel_name = filename[:-4]
        print(f"\n{hotel_name}:")
        if "başlıklar" in categories_and_ratings:
            formatted_categories = []
            for category, rating in categories_and_ratings.items():
                if category != "başlıklar":
                    if category in experience_name_to_id:
                        experience_id = experience_name_to_id[category]
                        formatted_categories.append(f"{experience_id}, {rating}")
            print(f"Kategoriler (hotel_id, Rating): {formatted_categories}")



Aska Bayview Resort Hotel:
Kategoriler (hotel_id, Rating): ['2, 9.2', '10, 9.1', '1, 8.6', '3, 9.1', '19, 8.3', '4, 9.1', '5, 9.2', '12, 9.3', '13, 7.1', '17, 7.1', '11, 8.0', '18, 10.0', '8, 10.0', '14, 6.1', '15, 10.0']

Aska Lara:
Kategoriler (hotel_id, Rating): ['2, 9.8', '3, 9.6', '19, 8.6', '5, 9.6', '1, 8.8', '4, 9.6', '10, 8.7', '13, 9.4', '18, 8.3', '17, 9.4', '11, 9.8', '15, 10.0', '12, 10.0', '14, 10.0']

Atlas Beach Hotel:
Kategoriler (hotel_id, Rating): ['2, 9.0', '1, 8.1', '10, 7.5', '3, 8.1', '5, 8.2', '4, 8.3', '19, 6.4', '15, 9.3', '17, 8.6', '11, 8.7', '13, 9.2', '18, 7.9', '14, 10.0']

Azura Deluxe Resort Spa Hotel:
Kategoriler (hotel_id, Rating): ['2, 8.8', '3, 8.7', '10, 8.7', '1, 8.3', '19, 8.3', '5, 7.8', '4, 7.8', '14, 8.9', '17, 9.3', '18, 7.5', '7, 4.9', '11, 8.9', '12, 8.5', '13, 10.0', '15, 10.0', '8, 10.0']

Baia Lara Hotel:
Kategoriler (hotel_id, Rating): ['2, 7.9', '1, 7.8', '3, 7.3', '10, 7.6', '19, 8.2', '5, 8.4', '4, 7.5', '12, 8.6', '17, 7.8', '15, 4

# Create Cypher Script


In [7]:
# Create a reverse dictionary mapping experience names to IDs
experience_name_to_id = {v: k for k, v in experience_mapping.items()}

# Process all files in translated_comments directory
final_results_dir = "../final_results"
for filename in os.listdir(final_results_dir):
    if filename.endswith(".txt"):
        file_path = os.path.join(final_results_dir, filename)
        categories_and_ratings = extract_categories_and_ratings(file_path)
        hotel_name = filename[:-4]
        
        if "başlıklar" in categories_and_ratings:
            experience_matches = []
            experience_creates = []
            
            for category, rating in categories_and_ratings.items():
                if category != "başlıklar" and category in experience_name_to_id:
                    experience_id = experience_name_to_id[category]
                    experience_matches.append(f"(e{experience_id}:Experience {{experience_id: {experience_id}}})")
                    experience_creates.append(f"(h)-[:HAS_EXPERIENCE {{rating: {rating}}}]->(e{experience_id})")
            
            if experience_matches and experience_creates:
                cypher_query = """SELECT * FROM cypher('metapersona-graph-db', $$
    MATCH (h:Hotel {name: '%s'})
    WITH h
    MATCH %s
    CREATE %s
$$) as (n agtype);""" % (hotel_name, ', '.join(experience_matches), ',\n           '.join(experience_creates))
                print(cypher_query)
                print("\n")


SELECT * FROM cypher('metapersona-graph-db', $$
    MATCH (h:Hotel {name: 'Aska Bayview Resort Hotel'})
    WITH h
    MATCH (e2:Experience {experience_id: 2}), (e10:Experience {experience_id: 10}), (e1:Experience {experience_id: 1}), (e3:Experience {experience_id: 3}), (e19:Experience {experience_id: 19}), (e4:Experience {experience_id: 4}), (e5:Experience {experience_id: 5}), (e12:Experience {experience_id: 12}), (e13:Experience {experience_id: 13}), (e17:Experience {experience_id: 17}), (e11:Experience {experience_id: 11}), (e18:Experience {experience_id: 18}), (e8:Experience {experience_id: 8}), (e14:Experience {experience_id: 14}), (e15:Experience {experience_id: 15})
    CREATE (h)-[:HAS_EXPERIENCE {rating: 9.2}]->(e2),
           (h)-[:HAS_EXPERIENCE {rating: 9.1}]->(e10),
           (h)-[:HAS_EXPERIENCE {rating: 8.6}]->(e1),
           (h)-[:HAS_EXPERIENCE {rating: 9.1}]->(e3),
           (h)-[:HAS_EXPERIENCE {rating: 8.3}]->(e19),
           (h)-[:HAS_EXPERIENCE {rating: 9.1}]

# Create graph

Add nodes

In [8]:
import networkx as nx
import pandas as pd

# Create an empty graph
G = nx.Graph()

# 1. Load Hotel Nodes and Add to Graph
hotel_nodes_df = pd.read_csv('..\\graph_nodes_edges\\hotel_nodes_2.csv', encoding='iso-8859-9')
for _, row in hotel_nodes_df.iterrows():
    G.add_node(row['id'], type='Hotel', name=row.get('name'), rating=row.get('rating'), hotel_id=row.get('hotel_id'))

# 2. Load Experience Nodes and Add to Graph  
experience_nodes_df = pd.read_csv('..\\graph_nodes_edges\\experience_nodes.csv', encoding='iso-8859-9')
for _, row in experience_nodes_df.iterrows():
    G.add_node(row['id'], type='Experience', name=row.get('name'), description=row.get('description'), experience_id= row.get('experience_id'))

# 3. Load Location Nodes and Add to Graph
location_nodes_df = pd.read_csv('..\\graph_nodes_edges\\location_nodes.csv', encoding='iso-8859-9')
for _, row in location_nodes_df.iterrows():
    G.add_node(row['id'], type='Location', name=row.get('name'), location_id = row.get('location_id'), encoding='iso-8859-9')

#4. Load User Nodes and Add to Graph
user_nodes_df = pd.read_csv('..\\graph_nodes_edges\\user_nodes.csv', encoding='iso-8859-9')
for _, row in user_nodes_df.iterrows():
    G.add_node(row['id'], type='User', name=row.get('name'), email=row.get('email'))

print(hotel_nodes_df)


                   id                                   name  rating  hotel_id
0    1970324837383878                       Melas Lara Hotel     9.2       241
1    1970324837383879                 IC Hotels Green Palace     9.2      1525
2    1970324837383880                  Barut Lara Collection     9.6       164
3    1970324837383881                    IC Hotels Residence     9.4      2648
4    1970324837383882                      IC Hotels Airport     8.8      2108
..                ...                                    ...     ...       ...
587  1970324837384447                     Lalila Blue Suites     NaN        23
588  1970324837384448                   Marti Hemithea Hotel     NaN       319
589  1970324837384449                      Costa Mare Suites     NaN       309
590  1970324837384450  Hilton Dalaman Sarigerme Resort & Spa     NaN      2642
591  1970324837384451                     XL Hotel Sarigerme     NaN      1990

[592 rows x 4 columns]


Add edges

In [9]:
# Edge verilerini yükle
has_experience_edges_df = pd.read_csv('..\\graph_nodes_edges\\has_experience_edges.csv')
for _, row in has_experience_edges_df.iterrows():
    G.add_edge(row['source'], row['target'], relationship_type=row.get('HAS_EXPERIENCE'), rating=row.get('rating'))

located_in_edges_df = pd.read_csv('..\\graph_nodes_edges\\located_in_edges.csv')
for _, row in located_in_edges_df.iterrows():
    G.add_edge(row['source'], row['target'], relationship_type=row.get('LOCATED_IN'))

stayed_at_edges_df = pd.read_csv('..\\graph_nodes_edges\\stayed_at_edges.csv')
for _, row in stayed_at_edges_df.iterrows():
    G.add_edge(row['source'], row['target'], relationship_type=row.get('STAYED_AT'))

likes_edges_df = pd.read_csv('..\\graph_nodes_edges\\likes_edges.csv')
for _, row in likes_edges_df.iterrows():
    G.add_edge(row['source'], row['target'], relationship_type=row.get('LIKES'))

print(has_experience_edges_df)


               source            target  rating
0    1970324837383890  3377699720527893     6.5
1    1970324837383890  3377699720527909     5.7
2    1970324837383890  3377699720527910     4.9
3    1970324837383890  3377699720527900     4.2
4    1970324837383890  3377699720527911     4.9
..                ...               ...     ...
516  1970324837383861  3377699720527894     7.8
517  1970324837383861  3377699720527903     9.5
518  1970324837383861  3377699720527906     3.6
519  1970324837383861  3377699720527901    10.0
520  1970324837383861  3377699720527905    10.0

[521 rows x 3 columns]


# Visualize the Graph

In [10]:
import plotly.graph_objects as go

def create_interactive_graph(G):
    # Get only connected nodes
    connected_nodes = [node for node in G.nodes() if G.degree(node) > 0]
    subG = G.subgraph(connected_nodes)
    
    # Optimize graph layout
    pos = nx.spring_layout(subG, k=2, iterations=100)
    
    # Define colors for node types
    colors = {
        'Hotel': '#4287f5',
        'Experience': '#42f554', 
        'Location': '#f54242',
        'User': '#FFA500' 
    }
    
    # Create edges
    edge_x, edge_y = [], []
    for edge in subG.edges():
        x0, y0 = pos[edge[0]]
        x1, y1 = pos[edge[1]]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
    
    # Create final figure with layout settings
    fig = go.Figure(layout=go.Layout(
        showlegend=True,
        hovermode='closest',
        margin=dict(b=20, l=5, r=5, t=40),
        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
        plot_bgcolor='white',
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="right",
            x=0.99,
            bgcolor='rgba(255,255,255,0.8)'
        ),
        width=1728,
        height=972
    ))

    # Check and add new edge types
    for edge in subG.edges(data=True):
        relationship_type = edge[2].get('relationship_type')
        if relationship_type in ['LIKES', 'STAYED_AT']:
            x0, y0 = pos[edge[0]]
            x1, y1 = pos[edge[1]]
            edge_trace = go.Scatter(
                x=[x0, x1, None],
                y=[y0, y1, None],
                line=dict(width=0.5, color='rgba(255,0,0,0.5)' if relationship_type == 'LIKES' else 'rgba(0,255,0,0.5)'),
                hoverinfo='none',
                mode='lines',
                showlegend=False
            )
            fig.add_trace(edge_trace)

    edge_trace = go.Scatter(
        x=edge_x, y=edge_y,
        line=dict(width=0.3, color='rgba(150,150,150,0.3)'),
        hoverinfo='none',
        mode='lines',
        showlegend=False)
    fig.add_trace(edge_trace)
    
    # Create nodes for each type (including User)
    for node_type in colors:
        node_x, node_y = [], []
        node_text, node_info = [], []
        
        nodes = [node for node in subG.nodes() if subG.nodes[node].get('type') == node_type]
        
        for node in nodes:
            x, y = pos[node]
            node_x.append(x)
            node_y.append(y)
            
            # Prepare hover information
            node_attrs = subG.nodes[node]
            info = f"<b>{node_type}</b><br>"
            info += f"<b>Name:</b> {node_attrs.get('name', 'N/A')}<br>"
            
            if node_type == 'Hotel':
                info += f"<b>Rating:</b> {node_attrs.get('rating', 'N/A')}<br>"
                info += f"<b>ID:</b> {node_attrs.get('hotel_id', 'N/A')}"
            elif node_type == 'Experience':
                info += f"<b>Description:</b> {node_attrs.get('description', 'N/A')}<br>"
                info += f"<b>ID:</b> {node_attrs.get('experience_id', 'N/A')}"
            elif node_type == 'Location':
                info += f"<b>ID:</b> {node_attrs.get('location_id', 'N/A')}"
            elif node_type == 'User':
                info += f"<b>Email:</b> {node_attrs.get('email', 'N/A')}"
            
            node_text.append(node_attrs.get('name', str(node)))
            node_info.append(info)
        
        if node_x:  # Only add trace if there are nodes
            node_trace = go.Scatter(
                x=node_x, y=node_y,
                mode='markers+text',
                hoverinfo='text',
                text=node_text,
                hovertext=node_info,
                textposition="bottom center",
                name=node_type,
                marker=dict(
                    color=colors[node_type],
                    size=25,
                    line_width=1.5,
                    line=dict(color='white'),
                    symbol='circle'
                ),
                textfont=dict(size=8)
            )
            fig.add_trace(node_trace)
    
    # Calculate statistics
    hotel_count = len([n for n in subG.nodes() if subG.nodes[n].get('type') == 'Hotel'])
    exp_count = len([n for n in subG.nodes() if subG.nodes[n].get('type') == 'Experience'])
    loc_count = len([n for n in subG.nodes() if subG.nodes[n].get('type') == 'Location'])
    user_count = len([n for n in subG.nodes() if subG.nodes[n].get('type') == 'User'])

    stats = (f"<b>Total Hotels:</b> {hotel_count} | "
            f"<b>Total Experiences:</b> {exp_count} | "
            f"<b>Total Locations:</b> {loc_count} | "
            f"<b>Total Users:</b> {user_count}")
    
    # Add statistics annotation
    fig.add_annotation(
        text=stats,
        xref="paper", yref="paper",
        x=0.5, y=1.02,
        showarrow=False,
        font=dict(size=12),
        bgcolor='rgba(255,255,255,0.8)',
        bordercolor='rgba(0,0,0,0.3)',
        borderwidth=1,
        borderpad=4
    )
    
    return fig

# Create and display interactive graph
fig = create_interactive_graph(G)
fig.show()

# Save as HTML file
fig.write_html("interactive_graph_filtered.html")

In [16]:
def recommend_hotels_for_experiences(G, experience_preferences):
    """
    experience_preferences: [(experience_id, importance_score), ...]
    importance_score: importance score given by customer to this experience (1-5)
    """
    experience_nodes = []
    for exp_id, _ in experience_preferences:
        for node in G.nodes():
            if (G.nodes[node].get('type') == 'Experience' and 
                G.nodes[node].get('experience_id') == exp_id):
                experience_nodes.append(node)
                break
    
    if not experience_nodes:
        return "Experiences not found"

    def calculate_weighted_pagerank(G):
        for u, v, data in G.edges(data=True):
            rating = data.get('rating', 0)
            G[u][v]['weight'] = rating ** 2
        
        pagerank_scores = nx.pagerank(
            G,
            alpha=0.9,
            weight='weight',
            max_iter=100
        )
        
        return pagerank_scores

    def calculate_collaborative_score(G, hotel_node, experience_nodes):
        """
        Benzer kullanıcıların otel değerlendirmelerini kullanarak collaborative score hesaplar
        """
        similar_users = []
        for user in G.nodes():
            if G.nodes[user].get('type') == 'User':
                user_ratings = []
                for exp_node in experience_nodes:
                    if G.has_edge(user, exp_node):
                        if G[user][exp_node].get('type') in ['likes', 'stayed_at']:
                            user_ratings.append(G[user][exp_node].get('rating', 0))
                
                if user_ratings:
                    similar_users.append(user)
        
        if not similar_users:
            return 0
        
        collaborative_score = 0
        count = 0
        for user in similar_users:
            if G.has_edge(user, hotel_node):
                if G[user][hotel_node].get('type') in ['likes', 'stayed_at']:
                    collaborative_score += G[user][hotel_node].get('rating', 0)
                    count += 1
        
        return collaborative_score / count if count > 0 else 0

    pagerank_scores = calculate_weighted_pagerank(G)
    
    hotels_data = []
    for node in G.nodes():
        if G.nodes[node].get('type') == 'Hotel':
            if any(G.has_edge(node, exp_node) for exp_node in experience_nodes):
                collaborative_score = calculate_collaborative_score(G, node, experience_nodes)
                
                hotel_data = {
                    'node_id': node,
                    'name': G.nodes[node].get('name'),
                    'hotel_rating': G.nodes[node].get('rating', 0),
                    'pagerank_score': pagerank_scores[node],
                    'collaborative_score': collaborative_score,
                    'experiences': [],
                    'experience_count': 0,
                    'avg_experience_rating': 0.0,
                    'selected_experiences_ratings': []
                }
                
                total_rating = 0
                num_experiences = 0
                
                for exp_node in experience_nodes:
                    if G.has_edge(node, exp_node):
                        exp_rating = G[node][exp_node].get('rating', 0)
                        exp_importance = next(score for id, score in experience_preferences 
                                           if G.nodes[exp_node].get('experience_id') == id)
                        hotel_data['selected_experiences_ratings'].append(
                            (exp_rating, exp_importance)
                        )
                
                for neighbor in G.neighbors(node):
                    if G.nodes[neighbor].get('type') == 'Experience':
                        exp_rating = G[node][neighbor].get('rating', 0)
                        exp_id = G.nodes[neighbor].get('experience_id')
                        if exp_id in experience_mapping:
                            hotel_data['experiences'].append({
                                'name': experience_mapping[exp_id],
                                'rating': exp_rating,
                                'pagerank': pagerank_scores[neighbor]
                            })
                            total_rating += exp_rating
                            num_experiences += 1
                
                if num_experiences > 0:
                    hotel_data['experience_count'] = num_experiences
                    hotel_data['avg_experience_rating'] = total_rating / num_experiences
                    hotels_data.append(hotel_data)
    
    def sort_hotels(hotels_data):
        def calculate_score(hotel):
            selected_exp_score = 0
            if hotel['selected_experiences_ratings']:
                weighted_ratings = [rating * (importance/5) 
                                  for rating, importance in hotel['selected_experiences_ratings']]
                selected_exp_score = sum(weighted_ratings) / len(weighted_ratings)
            
            return (
                hotel['avg_experience_rating'] * 0.25 +
                selected_exp_score * 0.35 +
                hotel['pagerank_score'] * 0.15 +
                hotel['hotel_rating'] * 0.1 +
                hotel['collaborative_score'] * 0.15
            )
        
        return sorted(hotels_data, key=calculate_score, reverse=True)
    
    sorted_hotels = sort_hotels(hotels_data)
    return sorted_hotels[:5]

experience_preferences = [
    (3, 5),  # first - very important
    (1, 3),  # second - medium importance  
    (2, 4)   # third - important
]

recommendations = recommend_hotels_for_experiences(G, experience_preferences)

print("\nBest hotel recommendations:")
for i, hotel in enumerate(recommendations, 1):
    print(f"\n{i}. {hotel['name']}")
    print(f"Hotel Rating: {hotel['hotel_rating']:.1f}")
    print(f"PageRank Score: {hotel['pagerank_score']:.4f}")
    print(f"Collaborative Score: {hotel['collaborative_score']:.4f}")
    print("Selected Experience Ratings:")
    for rating, importance in hotel['selected_experiences_ratings']:
        print(f"- Rating: {rating:.1f}, Importance: {importance}")
    print(f"Total Experience Count: {hotel['experience_count']}")
    print(f"Average Experience Rating: {hotel['avg_experience_rating']:.1f}")
    print("Other Experiences:")
    sorted_experiences = sorted(hotel['experiences'], 
                              key=lambda x: x['rating'], 
                              reverse=True)
    for exp in sorted_experiences:
        print(f"- {exp['name']} (Rating: {exp['rating']:.1f}, PageRank: {exp['pagerank']:.4f})")


Best hotel recommendations:

1. Concorde De Luxe Resort
Hotel Rating: 9.6
PageRank Score: 0.0078
Collaborative Score: 0.0000
Selected Experience Ratings:
- Rating: 9.4, Importance: 5
- Rating: 9.6, Importance: 3
- Rating: 9.7, Importance: 4
Total Experience Count: 14
Average Experience Rating: 9.5
Other Experiences:
- Spor Alanları (Rating: 10.0, PageRank: 0.0155)
- Özel Menü (Rating: 10.0, PageRank: 0.0149)
- Balayı (Rating: 10.0, PageRank: 0.0166)
- Çocuk Havuz Alanı (Rating: 10.0, PageRank: 0.0138)
- Hizmet Kalitesi (Rating: 9.7, PageRank: 0.0175)
- Çocuklu Tatil (2-10 yaş) (Rating: 9.6, PageRank: 0.0167)
- Su Kaydırağı (Rating: 9.6, PageRank: 0.0143)
- Gece Eğlence Programı (Rating: 9.6, PageRank: 0.0151)
- Bebekli Tatil (0-2 Yaş) (Rating: 9.5, PageRank: 0.0158)
- Ulaşım İmkanları (Rating: 9.4, PageRank: 0.0157)
- Hijyenik Tatil (Rating: 9.4, PageRank: 0.0173)
- Sadece Yetişkin Tatili (Rating: 8.9, PageRank: 0.0130)
- Kumsal (Rating: 8.8, PageRank: 0.0138)
- Rahat Uyku Deneyimi (R