In [None]:
import torch
import pandas as pd
import numpy as np
import re
import sys
from datetime import datetime
from tqdm import tqdm
import warnings
warnings.filterwarnings('ignore')
print("‚úÖ Packages imported!")


In [None]:
# Clear GPU memory
if torch.cuda.is_available():
    torch.cuda.empty_cache()
    print(f"üîß GPU memory cleared")
    device_name = torch.cuda.get_device_name(0)
    print(f"üîß Using GPU: {device_name}")
else:
    print("üîß Using CPU")

In [None]:

# 2. SENSORY ENHANCED GENERATOR
class WorkingTunisiaGenerator:
    """Natural comment generator with rich sensory details"""

    def __init__(self):
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        print(f"üîß Using device: {self.device}")

        self.model_name = "gpt2"
        self._load_model()

        # Tunisian places with sensory details
        self.places = [
            {
                "name": "Sidi Bou Said",
                "type": "coastal village",
                "features": ["blue and white architecture", "cliffside views",
                            "Caf√© des Nattes", "art galleries", "sea breeze"],
                "sensory_details": {
                    "sight": ["white-washed walls gleaming in the Mediterranean sun", 
                             "deep blue shutters contrasting against whitewash",
                             "panoramic sea views stretching to the horizon"],
                    "smell": ["salt-laden ocean air", "jasmine flowers from hidden courtyards",
                             "fresh linen drying on rooftops"],
                    "sound": ["gentle waves crashing against the cliffs below",
                             "soft murmur of tourists whispering in narrow streets",
                             "tinkling of wind chimes from gallery doorways"],
                    "touch": ["cool sea breeze on your face", "smooth worn stones underfoot",
                             "rough whitewashed walls under fingertips"],
                    "taste": ["salty Mediterranean air on lips", "fresh mint tea from caf√©s"]
                },
                "activities": ["sipping mint tea", "watching sunsets",
                              "taking photographs", "walking narrow streets"]
            },
            {
                "name": "Tunis Medina",
                "type": "historic city",
                "features": ["ancient souks", "Zitouna Mosque", "traditional crafts",
                            "narrow alleyways", "historic architecture"],
                "sensory_details": {
                    "sight": ["intricate geometric tilework on walls and floors",
                             "light filtering through carved wooden screens",
                             "vibrant fabrics and spices in rainbow colors"],
                    "smell": ["aromatic blend of cumin, cinnamon, and coriander",
                             "rich scent of freshly brewed coffee in tiny caf√©s",
                             "leather tanning workshops releasing pungent odors",
                             "incense and rose water from perfume stalls"],
                    "sound": ["rhythmic hammering of metalworkers in workshops",
                             "merchants calling out their wares",
                             "echoing footsteps bouncing off stone walls",
                             "distant call to prayer from the mosque"],
                    "touch": ["smooth polished brass and copper items",
                             "soft silk scarves sliding through fingers",
                             "sticky hands after handling fresh dates",
                             "cool marble floors in the mosque"],
                    "taste": ["sharp bite of preserved lemons", 
                             "sweet honey-soaked pastries", "bitter coffee",
                             "salty olives from market stalls"]
                },
                "activities": ["shopping for souvenirs", "exploring mosques",
                              "trying street food", "people watching"]
            },
            {
                "name": "Hammamet",
                "type": "beach resort",
                "features": ["sandy beaches", "historic fortress", "orange groves",
                            "luxury resorts", "medina walls"],
                "sensory_details": {
                    "sight": ["golden sand stretching endlessly",
                             "turquoise water glistening in sunlight",
                             "ancient fortress walls rising from the beach"],
                    "smell": ["coconut sunscreen mixing with sea salt",
                             "fragrant orange blossoms from nearby groves",
                             "fresh grilled fish from beachside restaurants"],
                    "sound": ["waves gently lapping the shore",
                             "seagulls calling overhead",
                             "distant music from beach clubs",
                             "children's laughter on the sand"],
                    "touch": ["warm sand between toes",
                             "cool refreshing seawater",
                             "gentle Mediterranean breeze on sunburned skin"],
                    "taste": ["salt from the sea on lips",
                             "fresh grilled seafood", "cold fruit juices",
                             "sweet pastries from local bakeries"]
                },
                "activities": ["sunbathing", "water sports", "spa treatments",
                              "exploring the old town"]
            },
            {
                "name": "Djerba",
                "type": "island",
                "features": ["white sandy beaches", "traditional architecture",
                            "El Ghriba synagogue", "palm trees", "clear waters"],
                "sensory_details": {
                    "sight": ["pristine white sand like powder",
                             "crystal-clear turquoise waters",
                             "whitewashed houses with blue doors",
                             "tall palm trees swaying gently"],
                    "smell": ["clean, fresh sea air",
                             "tropical flowers blooming year-round",
                             "salt spray from breaking waves",
                             "smoke from traditional fish grills"],
                    "sound": ["rhythmic sound of waves on white sand",
                             "rustling of palm fronds in the breeze",
                             "traditional Djerbian music from local homes",
                             "fishermen's calls at dawn"],
                    "touch": ["impossibly soft white sand",
                             "warm water caressing your skin",
                             "gentle island breeze carrying salt spray"],
                    "taste": ["grilled octopus with lemon juice",
                             "fresh local fish", "sweet island dates",
                             "refreshing coconut water"]
                },
                "activities": ["beach relaxation", "cultural visits", "seafood dining",
                              "shopping for handicrafts"]
            },
            {
                "name": "Carthage",
                "type": "archaeological site",
                "features": ["Roman ruins", "ancient amphitheater", "museum",
                            "coastal views", "historic artifacts"],
                "sensory_details": {
                    "sight": ["crumbling ancient columns against blue sky",
                             "intricate Roman mosaics still vibrant after centuries",
                             "sunset casting long shadows over ruins",
                             "museum displays of preserved artifacts"],
                    "smell": ["dusty, earthy scent of ancient stone",
                             "warm Mediterranean air carrying hints of pine",
                             "aged museum air with faint restoration chemicals"],
                    "sound": ["whispers echoing off amphitheater walls",
                             "wind through crumbling structures",
                             "distant waves from the nearby coast",
                             "occasional tour guide explaining history"],
                    "touch": ["worn, smooth ancient stones from centuries of touch",
                             "rough edges of fractured columns",
                             "warm sun-heated stone beneath palm of hand"],
                    "taste": ["dust on lips from walking through ruins",
                             "local mint tea at nearby caf√©s",
                             "fresh fruit from vendors near museum entrance"]
                },
                "activities": ["exploring history", "museum visits", "guided tours",
                              "learning about ancient civilizations"]
            },
            {
                "name": "Sahara Desert - Douz",
                "type": "desert oasis",
                "features": ["endless golden dunes", "bedouin camps", "camel markets",
                            "palm oases", "desert sunsets"],
                "sensory_details": {
                    "sight": ["endless waves of golden sand dunes",
                             "heat shimmering rising from the desert floor",
                             "brilliant star-filled night sky without light pollution",
                             "colorful bedouin tents and traditional dress"],
                    "smell": ["dry, warm desert air",
                             "smoke from bedouin cooking fires",
                             "camel leather and saddles",
                             "dust swirling in hot wind"],
                    "sound": ["wind howling across vast dunes",
                             "camel bells and footsteps on sand",
                             "bedouin voices singing traditional songs",
                             "silence so profound it feels alive"],
                    "touch": ["scorching sun on exposed skin",
                             "fine, warm desert sand between fingers",
                             "hot wind across your face",
                             "cool touch of night air after sunset"],
                    "taste": ["strong mint tea served in glasses",
                             "dry, gritty sensation of sand in your mouth",
                             "salty sweat on your lips",
                             "sweet dates and goat cheese"]
                },
                "activities": ["camel trekking", "watching desert sunrise", "visiting bedouin camps",
                              "attending camel markets", "sleeping under stars"]
            },
            {
                "name": "Ksar Ouled Soltane",
                "type": "ancient fortress settlement",
                "features": ["traditional underground homes", "historic ksars", "ochre-colored clay",
                            "narrow passageways", "mud-brick architecture"],
                "sensory_details": {
                    "sight": ["towering walls of compressed earth and clay",
                             "golden-brown ochre tones everywhere",
                             "intricate carved wooden door frames",
                             "shadows creating maze-like patterns on walls"],
                    "smell": ["earthy scent of ancient mud and clay",
                             "musty air from underground chambers",
                             "incense burning in homes",
                             "animal odors from livestock areas"],
                    "sound": ["footsteps echoing through narrow corridors",
                             "children playing in covered passages",
                             "roosters crowing in the morning",
                             "wind whistling through openings"],
                    "touch": ["rough clay walls under your palms",
                             "cool air underground",
                             "smooth worn stones from centuries of use",
                             "loose dust covering everything"],
                    "taste": ["thick, sweet tea made over open fires",
                             "fresh bread baked in traditional ovens",
                             "gritty taste of dust in the air",
                             "simple vegetables from local gardens"]
                },
                "activities": ["exploring underground homes", "photographing architecture",
                              "meeting local families", "learning about traditional living"]
            },
            {
                "name": "Gafsa Phosphate Region",
                "type": "industrial heritage site",
                "features": ["mining operations", "industrial museums", "crater lakes",
                            "worker villages", "geological formations"],
                "sensory_details": {
                    "sight": ["enormous open-pit mines with geometric patterns",
                             "turquoise and green crater lakes",
                             "industrial structures against desert landscape",
                             "colorful mineral deposits in rock layers"],
                    "smell": ["sulfurous chemical odors from mines",
                             "metallic, mineral-rich air",
                             "dust particles suspended in sunlight",
                             "industrial processing aromas"],
                    "sound": ["machinery echoing across excavation sites",
                             "trucks rumbling through mining areas",
                             "worker conversations and tools clanging",
                             "wind carrying industrial hums"],
                    "touch": ["fine mineral dust on skin",
                             "hot, dry wind from mine areas",
                             "rough mineral samples",
                             "warm sun-baked rocks"],
                    "taste": ["mineral-laden dust on lips",
                             "strong coffee from worker caf√©s",
                             "salty perspiration",
                             "metallic aftertaste in mouth"]
                },
                "activities": ["visiting mining museums", "viewing crater lakes",
                              "learning about phosphate industry", "photographing landscapes"]
            },
            {
                "name": "Kairouan",
                "type": "holy Islamic city",
                "features": ["Great Mosque", "medina bazaars", "carpet workshops",
                            "ancient walls", "religious sites"],
                "sensory_details": {
                    "sight": ["magnificent minaret rising above the city",
                             "intricately woven carpets in every color",
                             "Islamic calligraphy and geometric patterns",
                             "ancient stone fortifications surrounding medina"],
                    "smell": ["powerful aroma of traditional carpet dyes",
                             "intense frankincense from prayer areas",
                             "sweet perfumes mixing in bazaars",
                             "wool and natural fibers being worked"],
                    "sound": ["melodious call to prayer from the mosque",
                             "rhythmic clacking of carpet looms",
                             "merchant voices negotiating prices",
                             "prayer chants echoing through streets"],
                    "touch": ["soft, luxurious carpet fabrics",
                             "cool marble and stone in the mosque",
                             "smooth prayer beads",
                             "worn wooden doorframes"],
                    "taste": ["sweet traditional pastries",
                             "thick coffee with cardamom",
                             "salty preserved meats",
                             "honey and almond confections"]
                },
                "activities": ["visiting the Great Mosque", "exploring carpet workshops",
                              "shopping in bazaars", "learning about Islamic history"]
            },
            {
                "name": "Tozeur",
                "type": "palm oasis town",
                "features": ["200,000 palm trees", "traditional brick houses", "Star Wars filming location",
                            "mineral springs", "desert gateway"],
                "sensory_details": {
                    "sight": ["dense canopy of thousands of palm trees",
                             "traditional blue and red brick buildings",
                             "dusty orange streets",
                             "otherworldly desert landscape"],
                    "smell": ["sweet fragrance of ripe dates",
                             "earthy petrichor after desert rain",
                             "smoke from date palm fires",
                             "fresh date juice being pressed"],
                    "sound": ["rustling of millions of palm fronds",
                             "palm branches creaking in wind",
                             "water flowing through irrigation channels",
                             "birds nesting in date palms"],
                    "touch": ["cool shade under dense palm canopy",
                             "soft date flesh in your mouth",
                             "rough palm bark",
                             "refreshing desert breeze"],
                    "taste": ["incredibly sweet Deglet Noor dates",
                             "fresh date juice",
                             "date paste in traditional sweets",
                             "rich date and almond pastries"]
                },
                "activities": ["wandering through palm groves", "tasting fresh dates",
                              "visiting film locations", "exploring traditional town"]
            },
            {
                "name": "Gab√®s",
                "type": "coastal agricultural port",
                "features": ["date palm gardens", "fishing harbor", "seafood markets",
                            "oasis meeting sea", "traditional boats"],
                "sensory_details": {
                    "sight": ["sea of palm trees meeting Mediterranean coastline",
                             "colorful fishing boats in harbor",
                             "bustling fish market with vibrant displays",
                             "sunset over water reflecting on wet fish"],
                    "smell": ["overwhelming aroma of fresh fish",
                             "salt spray mixing with date blossoms",
                             "seafood cooking aromas from restaurants",
                             "sweet date fragrance contrasting with salty sea"],
                    "sound": ["seagulls crying above fishing boats",
                             "fishermen's calls and market haggling",
                             "waves lapping against harbor walls",
                             "boat ropes creaking with the tide"],
                    "touch": ["rough scales of fresh fish",
                             "cool sea breeze mixed with warm date garden air",
                             "wet wooden fishing boats",
                             "slippery harbor stones"],
                    "taste": ["incredibly fresh grilled seafood",
                             "whole fish with lemon and herbs",
                             "sweet dates with salty fish contrast",
                             "fresh seafood couscous"]
                },
                "activities": ["exploring fish markets", "boat tours", "seafood dining",
                              "wandering date palm gardens", "photographing harbor"]
            },
            {
                "name": "Matmata Underground Dwellings",
                "type": "troglodyte village",
                "features": ["underground homes", "sunken courtyards", "Star Wars set",
                            "traditional berber culture", "mud architecture"],
                "sensory_details": {
                    "sight": ["circular sunken courtyards with openings to sky",
                             "cavernous underground rooms and passages",
                             "white-washed interior walls",
                             "natural light shafts illuminating darkness"],
                    "smell": ["cool, earthy smell of underground chambers",
                             "smoke from cooking fires rising up",
                             "musty, ancient air",
                             "herbs drying in rooms"],
                    "sound": ["voices echoing in underground spaces",
                             "children playing in sunken areas",
                             "hollow sounds amplified underground",
                             "silence broken by movement"],
                    "touch": ["cool underground air year-round",
                             "smooth polished rock from use",
                             "cool mud floors under feet",
                             "warmth of sunlight in courtyards"],
                    "taste": ["strong traditional berber tea",
                             "bread baked in underground ovens",
                             "tagine stews",
                             "local dried fruits and nuts"]
                },
                "activities": ["exploring underground homes", "staying in guesthouses",
                              "learning berber culture", "tasting traditional food"]
            },
            {
                "name": "Chott el Djerid Salt Lake",
                "type": "salt flat desert",
                "features": ["vast white salt expanse", "mirages", "mineral deposits",
                            "desert exploration", "unique landscape"],
                "sensory_details": {
                    "sight": ["blinding white salt stretching to horizon",
                             "shimmering heat mirages",
                             "stark contrast of white against blue sky",
                             "geometric salt formations"],
                    "smell": ["pungent, acrid salt smell",
                             "dry desert air",
                             "mineral-laden wind",
                             "absence of organic scents"],
                    "sound": ["crunching of salt crystals underfoot",
                             "wind whistling across flat terrain",
                             "engine sounds seeming louder on salt",
                             "eerie quiet and emptiness"],
                    "touch": ["sharp, crystalline salt crystals",
                             "intense sun heat on exposed skin",
                             "dry wind sucking moisture",
                             "salty residue on skin"],
                    "taste": ["salt coating your lips and mouth",
                             "mineral taste in the air",
                             "intense thirst from the environment",
                             "bitter mineral sensation"]
                },
                "activities": ["crossing salt flats", "photography", "watching mirages",
                              "exploring mineral formations", "4x4 desert tours"]
            },
            {
                "name": "El Djem Roman Amphitheater",
                "type": "archaeological monument",
                "features": ["massive stone amphitheater", "Roman engineering", "underground chambers",
                            "pristine preservation", "gladiator history"],
                "sensory_details": {
                    "sight": ["monumental stone structure", 
                             "three tiers of soaring arches",
                             "precise geometric Roman architecture",
                             "dramatic play of light and shadow"],
                    "smell": ["ancient stone dust",
                             "mineral scent from aged limestone",
                             "warm dry Mediterranean air",
                             "faint museum preservation chemicals"],
                    "sound": ["voices amplified mysteriously by acoustics",
                             "echoing footsteps in underground passages",
                             "wind through empty arches",
                             "occasional tour guide explanations"],
                    "touch": ["massive smooth stone blocks",
                             "worn steps polished by centuries",
                             "cool interior stone",
                             "hot exterior sun-baked surface"],
                    "taste": ["dust particles on lips",
                             "faint mineral taste from ancient stone",
                             "fresh water from nearby vendors",
                             "local pastries from site caf√©s"]
                },
                "activities": ["exploring amphitheater", "visiting underground chambers",
                              "guided historical tours", "photographing architecture"]
            }
        ]

        self.stats = {
            'generated': 0,
            'failed': 0,
            'en': 0,
            'fr': 0
        }

    def _load_model(self):
        """Load GPT-2 model with proper settings"""
        from transformers import AutoTokenizer, AutoModelForCausalLM

        print("üî• Loading GPT-2 model...")

        try:
            self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
            self.model = AutoModelForCausalLM.from_pretrained(self.model_name)
            self.tokenizer.pad_token = self.tokenizer.eos_token

            if torch.cuda.is_available():
                self.model = self.model.to(self.device)
                print(f"‚úÖ Model loaded on GPU")
            else:
                print(f"‚úÖ Model loaded on CPU")

        except Exception as e:
            print(f"‚ùå Error loading model: {e}")
            print("üîÑ Falling back to distilgpt2")
            self.tokenizer = AutoTokenizer.from_pretrained("distilgpt2")
            self.model = AutoModelForCausalLM.from_pretrained("distilgpt2")
            self.tokenizer.pad_token = self.tokenizer.eos_token
            if torch.cuda.is_available():
                self.model = self.model.to(self.device)

    def create_simple_natural_prompt(self, place, sentiment, language):
        """Create prompts with rich sensory details"""
        
        # Get random sensory details
        sight = np.random.choice(place['sensory_details']['sight'])
        smell = np.random.choice(place['sensory_details']['smell'])
        sound = np.random.choice(place['sensory_details']['sound'])
        touch = np.random.choice(place['sensory_details']['touch'])
        taste = np.random.choice(place['sensory_details']['taste'])
        
        personas_en = [
            "a solo traveler from the UK",
            "a couple on their honeymoon",
            "a family with two children",
            "a group of friends on vacation",
            "a retired couple traveling"
        ]
        
        personas_fr = [
            "une voyageuse seule de France",
            "un couple en voyage de noces",
            "une famille avec deux enfants",
            "un groupe d'amis en vacances",
            "un couple retrait√© qui voyage"
        ]
        
        if language == 'fr':
            persona = np.random.choice(personas_fr)
            
            if sentiment == 'positive':
                starters = [
                    f"Je reviens d'un s√©jour √† {place['name']} et l'exp√©rience a engag√© tous mes sens.",
                    f"Mon voyage √† {place['name']} a √©t√© un v√©ritable festin sensoriel.",
                    f"Je viens de passer quelques jours √† {place['name']}, une explosion de sensations."
                ]
                transition = "Ce qui m'a le plus marqu√©, c'est"
            elif sentiment == 'negative':
                starters = [
                    f"Je dois √™tre honn√™te sur mon exp√©rience sensorielle √† {place['name']}.",
                    f"Mon s√©jour √† {place['name']} n'a pas correspondu √† mes attentes sensorielles.",
                    f"Je reviens de {place['name']} avec des sentiments contrast√©s sur l'atmosph√®re."
                ]
                transition = "Malheureusement"
            else:  # neutral
                starters = [
                    f"Je viens de visiter {place['name']} et voici mes impressions.",
                    f"Mon s√©jour √† {place['name']} m'a laiss√© des souvenirs m√©lang√©s.",
                    f"Je vais partager mes impressions sensorielles de {place['name']}."
                ]
                transition = "Globalement"
            
            starter = np.random.choice(starters)
            
            prompt = f"{starter} En tant que {persona}, "
            prompt += f"j'ai √©t√© captiv√© par la vue des {sight}. "
            prompt += f"L'odeur de {smell} flottait dans l'air. "
            prompt += f"J'ai √©cout√© {sound}. "
            prompt += f"En touchant {touch}, j'ai ressenti l'essence du lieu. "
            prompt += f"J'ai go√ªt√© {taste}. "
            prompt += f"{transition}, "
            
        else:  # English
            persona = np.random.choice(personas_en)
            
            if sentiment == 'positive':
                starters = [
                    f"Just got back from {place['name']} and it engaged all my senses.",
                    f"My trip to {place['name']} was a true sensory feast.",
                    f"I spent a few days at {place['name']}, what an explosion of sensations."
                ]
                transition = "What really struck me was"
            elif sentiment == 'negative':
                starters = [
                    f"I need to be honest about my sensory experience at {place['name']}.",
                    f"My stay at {place['name']} didn't meet my sensory expectations.",
                    f"I'm back from {place['name']} with mixed feelings about the atmosphere."
                ]
                transition = "Unfortunately"
            else:  # neutral
                starters = [
                    f"Just visited {place['name']} and here's what I experienced.",
                    f"My stay at {place['name']} left me with mixed impressions.",
                    f"I want to share my sensory impressions of {place['name']}."
                ]
                transition = "Overall"
            
            starter = np.random.choice(starters)
            
            prompt = f"{starter} As {persona}, "
            prompt += f"I was captivated by {sight}. "
            prompt += f"The smell of {smell} filled the air. "
            prompt += f"I could hear {sound}. "
            prompt += f"Touching {touch}, I felt the essence of the place. "
            prompt += f"I tasted {taste}. "
            prompt += f"{transition}, "
        
        return prompt

    def generate_natural_text(self, prompt, language='en', max_length=100):
        """Generate natural text with proper error handling"""

        try:
            inputs = self.tokenizer(
                prompt,
                return_tensors="pt",
                truncation=True,
                max_length=50,
                padding=True
            )

            attention_mask = inputs['attention_mask']

            if torch.cuda.is_available():
                inputs['input_ids'] = inputs['input_ids'].to(self.device)
                attention_mask = attention_mask.to(self.device)

            with torch.no_grad():
                outputs = self.model.generate(
                    input_ids=inputs['input_ids'],
                    attention_mask=attention_mask,
                    max_new_tokens=max_length,
                    min_new_tokens=30,
                    temperature=0.8,
                    top_p=0.9,
                    top_k=50,
                    repetition_penalty=1.1,
                    do_sample=True,
                    pad_token_id=self.tokenizer.pad_token_id,
                    eos_token_id=self.tokenizer.eos_token_id,
                    no_repeat_ngram_size=2,
                    num_return_sequences=1,
                )

            generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)

            if prompt in generated_text:
                generated_text = generated_text[len(prompt):].strip()

            generated_text = self._clean_natural_text(generated_text)

            if generated_text and len(generated_text.split()) >= 10:
                return generated_text
            else:
                return None

        except Exception as e:
            print(f"‚ö†Ô∏è Generation error: {e}")
            return None

    def _clean_natural_text(self, text):
        """Clean text to be natural"""

        unwanted = [
            "In my review, I would say", "My TripAdvisor review would be",
            "As a tourist, I would write", "Here is my honest review:",
            "Dans mon avis, je dirais", "Mon avis sur TripAdvisor serait",
            "En tant que touriste, j'√©crirais", "Voici mon avis honn√™te:"
        ]

        for phrase in unwanted:
            if phrase in text:
                text = text.replace(phrase, "")

        sentences = re.split(r'(?<=[.!?])\s+', text)
        if len(sentences) > 1:
            if not sentences[-1].endswith(('.', '!', '?')):
                text = ' '.join(sentences[:-1])
            else:
                text = ' '.join(sentences)

        text = re.sub(r'\s+', ' ', text).strip()

        if text and len(text) > 1:
            text = text[0].upper() + text[1:]

        if text and text[-1] not in ['.', '!', '?']:
            text = text.rstrip(',;:') + '.'

        return text

    def analyze_sentiment(self, text):
        """Simple sentiment analysis"""
        from textblob import TextBlob

        try:
            blob = TextBlob(text)
            polarity = blob.sentiment.polarity

            if polarity > 0.2:
                return 'positive', abs(polarity)
            elif polarity < -0.2:
                return 'negative', abs(polarity)
            else:
                return 'neutral', 0.5
        except:
            return 'neutral', 0.5

    def generate_one_comment(self):
        """Generate one natural comment"""

        place = np.random.choice(self.places)
        language = np.random.choice(['fr', 'en'], p=[0.6, 0.4])
        sentiments = ['positive', 'negative', 'neutral']
        sentiment_probs = [0.65, 0.20, 0.15]
        sentiment = np.random.choice(sentiments, p=sentiment_probs)

        prompt = self.create_simple_natural_prompt(place, sentiment, language)
        text = self.generate_natural_text(prompt, language)

        if not text:
            self.stats['failed'] += 1
            return None

        predicted_sentiment, confidence = self.analyze_sentiment(text)

        comment_data = {
            'id': f"TUN_{self.stats['generated']:06d}",
            'text': text,
            'language': language,
            'place': place['name'],
            'place_type': place['type'],
            'target_sentiment': sentiment,
            'predicted_sentiment': predicted_sentiment,
            'sentiment_confidence': confidence,
            'sentiment_match': sentiment == predicted_sentiment,
            'rating': self._generate_rating(predicted_sentiment),
            'timestamp': datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            'word_count': len(text.split())
        }

        self.stats['generated'] += 1
        self.stats[language] += 1

        return comment_data

    def _generate_rating(self, sentiment):
        """Generate realistic rating"""
        if sentiment == 'positive':
            return np.random.choice([4, 5])
        elif sentiment == 'negative':
            return np.random.choice([1, 2])
        else:
            return np.random.choice([3, 4])

    def generate_dataset(self, num_comments=100):
        """Generate dataset of natural comments"""

        print(f"\nüöÄ Generating {num_comments:,} sensory-rich comments...")
        print("=" * 60)

        all_comments = []
        pbar = tqdm(total=num_comments, desc="Generating comments")

        while len(all_comments) < num_comments:
            comment = self.generate_one_comment()

            if comment:
                all_comments.append(comment)
                pbar.update(1)

                if len(all_comments) % 10 == 0:
                    print(f"üìä Generated: {len(all_comments):,}/{num_comments:,}")
                    print(f"   Success rate: {len(all_comments)/(len(all_comments)+self.stats['failed']):.1%}")

            if self.stats['failed'] > 100 and len(all_comments) < 10:
                print("‚ö†Ô∏è Too many failures, trying alternative approach...")
                break

        pbar.close()

        if all_comments:
            df = pd.DataFrame(all_comments)
            print(f"\n‚úÖ Generation complete!")
            print(f"   Generated: {len(df):,} comments")
            print(f"   Failed: {self.stats['failed']:,}")
            print(f"   Success rate: {len(df)/(len(df)+self.stats['failed']):.1%}")
            return df
        else:
            print("‚ùå No comments generated")
            return pd.DataFrame()





In [None]:

# 4. MAIN FUNCTION
def main_working(num_comments=100):
    """Main working function"""

    print("=" * 60)
    print("üèõÔ∏è  SENSORY-ENHANCED TUNISIA COMMENTS GENERATOR")
    print("=" * 60)

    print("\nüß™ Running quick test...")
    success, df_test = test_generation()

    if not success:
        print("\n‚ùå Test failed. Cannot proceed with generation.")
        return None

    print("\n‚úÖ Test passed! Starting main generation...")

    generator = WorkingTunisiaGenerator()
    df = generator.generate_dataset(num_comments=num_comments)

    if len(df) == 0:
        print("‚ùå No comments generated")
        return None

    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
    filename = f"tunisia_sensory_comments_{len(df)}_{timestamp}.csv"
    df.to_csv(filename, index=False)

    print(f"\nüìä DATASET STATISTICS:")
    print(f"   Total comments: {len(df):,}")
    print(f"   French comments: {len(df[df['language'] == 'fr']):,}")
    print(f"   English comments: {len(df[df['language'] == 'en']):,}")
    print(f"   Sentiment distribution: {df['predicted_sentiment'].value_counts().to_dict()}")
    print(f"   Average word count: {df['word_count'].mean():.1f}")

    print(f"\nüìñ SAMPLE COMMENTS:")
    print("=" * 60)

    samples = df.sample(min(3, len(df)))
    for idx, row in samples.iterrows():
        lang_flag = "üá´üá∑" if row['language'] == 'fr' else "üá¨üáß"
        sentiment_icon = "üòä" if row['predicted_sentiment'] == 'positive' else "üòû" if row['predicted_sentiment'] == 'negative' else "üòê"

        print(f"\n{lang_flag} {sentiment_icon} {row['place']}")
        print(f"Rating: {row['rating']}/5 | Words: {row['word_count']}")
        print(f"\"{row['text']}\"")
        print("-" * 40)

    print(f"\nüíæ Saved to: {filename}")

    return df

In [None]:
# 3. TEST FUNCTION
def test_generation():
    """Test the generator with small sample"""

    print("üß™ Testing sensory comment generation...")
    print("=" * 60)

    generator = WorkingTunisiaGenerator()
    test_comments = []

    for i in range(5):
        print(f"\nGenerating comment {i+1}/5...")
        comment = generator.generate_one_comment()

        if comment:
            test_comments.append(comment)
            print(f"‚úÖ Success! Language: {comment['language']}")
            print(f"   Text: {comment['text'][:100]}...")
        else:
            print(f"‚ùå Failed")

    if test_comments:
        df_test = pd.DataFrame(test_comments)
        print(f"\nüéâ Test successful! Generated {len(test_comments)} comments.")
        return True, df_test
    else:
        print("\n‚ùå Test failed")
        return False, None


In [None]:
# 5. EXECUTION
if __name__ == "__main__":
    import os
    from pathlib import Path
    
    print("\nüéõÔ∏è  SENSORY TUNISIA GENERATOR - LOCAL PC")
    print("=" * 60)
    
    # Create Data/raw directory if it doesn't exist
    data_dir = Path("Data/raw")
    data_dir.mkdir(parents=True, exist_ok=True)
    print(f"üìÅ Output directory: {data_dir.absolute()}")
    
    # Show system info
    print(f"üîß Device: {torch.device('cuda' if torch.cuda.is_available() else 'cpu')}")
    if torch.cuda.is_available():
        print(f"üîß GPU: {torch.cuda.get_device_name(0)}")
    else:
        print("üîß Using CPU (GPU not available)")
    
    print("\nOptions:")
    print("1. Quick test (5 comments)")
    print("2. Small dataset (100 comments)")
    print("3. Medium dataset (500 comments)")
    print("4. Large dataset (2,000 comments)")
    print("5. Full dataset (10,000 comments)")
    print("6. Custom number of comments")
    print("0. Exit")
    
    while True:
        choice = input("\nEnter choice (0-6): ").strip()
        
        if choice == "0":
            print("üëã Exiting...")
            break
            
        elif choice == "1":
            print("\nüß™ Running quick test...")
            success, df_test = test_generation()
            if success and df_test is not None:
                filename = data_dir / "tunisia_test_sensory_5.csv"
                df_test.to_csv(filename, index=False)
                print(f"\n‚úÖ Test complete! Saved to {filename}")
                # Show sample
                print(f"\nüìä Generated {len(df_test)} comments")
                print(f"üìÅ Saved to: {filename.absolute()}")
                
        elif choice == "2":
            print("\nüöÄ Generating 100 sensory comments...")
            print("Estimated time: 1-2 minutes")
            df = main_working(100)
            if df is not None:
                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                filename = data_dir / f"tunisia_sensory_100_{timestamp}.csv"
                df.to_csv(filename, index=False)
                print(f"\n‚úÖ Complete! Saved to {filename}")
                
        elif choice == "3":
            print("\nüöÄ Generating 500 sensory comments...")
            print("Estimated time: 5-10 minutes")
            confirm = input("Continue? (yes/no): ").strip().lower()
            if confirm in ['yes', 'y', '']:
                df = main_working(500)
                if df is not None:
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    filename = data_dir / f"tunisia_sensory_500_{timestamp}.csv"
                    df.to_csv(filename, index=False)
                    print(f"\n‚úÖ Complete! Saved to {filename}")
                    
        elif choice == "4":
            print("\nüöÄ Generating 2,000 sensory comments...")
            print("Estimated time: 20-30 minutes")
            print("‚ö†Ô∏è  This will use significant memory and time")
            confirm = input("Continue? (yes/no): ").strip().lower()
            if confirm in ['yes', 'y']:
                df = main_working(2000)
                if df is not None:
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    filename = data_dir / f"tunisia_sensory_2000_{timestamp}.csv"
                    df.to_csv(filename, index=False)
                    print(f"\n‚úÖ Complete! Saved to {filename}")
                    
        elif choice == "5":
            print("\n‚ö†Ô∏è  GENERATING 10,000 COMMENTS")
            print("=" * 40)
            print("‚ö†Ô∏è  WARNING: This will take 1-2 hours")
            print("‚ö†Ô∏è  Requires stable internet connection for model")
            print("‚ö†Ô∏è  May use significant disk space (~10MB)")
            print("=" * 40)
            confirm = input("Are you SURE you want to continue? (yes/no): ").strip().lower()
            if confirm in ['yes', 'y']:
                print("\nüîÑ Starting generation...")
                df = main_working(10000)
                if df is not None:
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    filename = data_dir / f"tunisia_sensory_10000_{timestamp}.csv"
                    df.to_csv(filename, index=False)
                    print(f"\n‚úÖ Complete! Saved to {filename}")
                    print(f"üìä File size: {os.path.getsize(filename) / 1024 / 1024:.2f} MB")
                    
        elif choice == "6":
            try:
                num = int(input("Enter number of comments to generate: ").strip())
                if num <= 0:
                    print("‚ùå Please enter a positive number")
                    continue
                    
                if num > 5000:
                    print(f"\n‚ö†Ô∏è  Warning: Generating {num} comments")
                    print(f"Estimated time: {num/500*5:.1f} - {num/500*10:.1f} minutes")
                    confirm = input(f"Generate {num} comments? (yes/no): ").strip().lower()
                    if confirm not in ['yes', 'y']:
                        continue
                
                print(f"\nüöÄ Generating {num} sensory comments...")
                df = main_working(num)
                if df is not None:
                    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
                    filename = data_dir / f"tunisia_sensory_{num}_{timestamp}.csv"
                    df.to_csv(filename, index=False)
                    print(f"\n‚úÖ Complete! Saved to {filename}")
                    print(f"üìä Generated {len(df)} comments")
                    
            except ValueError:
                print("‚ùå Please enter a valid number")
                
        else:
            print("‚ùå Invalid choice. Please enter 0-6.")
            continue
        
        # Ask if user wants to continue or exit
        print("\n" + "=" * 40)
        another = input("Generate another dataset? (yes/no): ").strip().lower()
        if another not in ['yes', 'y']:
            print("üëã Exiting...")
            break
    
    print("\n‚ú® Script execution completed!")
    
    # List generated files
    csv_files = list(data_dir.glob("*.csv"))
    if csv_files:
        print("\nüìÅ Generated files in Data/raw:")
        for i, file in enumerate(csv_files, 1):
            size_mb = file.stat().st_size / 1024 / 1024
            print(f"  {i}. {file.name} ({size_mb:.2f} MB)")
    else:
        print("\nüìÅ No CSV files generated in Data/raw")
    
    input("\nPress Enter to exit...")