In [8]:
import os
import subprocess

print("📥 Cloning GitHub repository...")

if not os.path.exists('Real-time-competitor-strategy-tracker'):
    subprocess.run(
        ['git', 'clone', 'https://github.com/Techierookies/Real-time-competitor-strategy-tracker.git'],
        capture_output=True,
        timeout=60
    )
    print("✅ Repository cloned")
else:
    print("⚠️ Repository already exists locally.")

📥 Cloning GitHub repository...
✅ Repository cloned


In [9]:
# ============================================================================
# VIEW ALL REAL iPhone RECORDS FROM DATABASE + EXPORT TO CSV
# ============================================================================

import sqlite3
import pandas as pd
import os
from google.colab import files

db_path = 'Real-time-competitor-strategy-tracker/competitor_tracker.db'

print("=" * 100)
print("📱 VIEWING ALL REAL iPHONE RECORDS FROM DATABASE")
print("=" * 100)

if os.path.exists(db_path):
    conn = sqlite3.connect(db_path)

    # Query ALL records (not just samples)
    query = "SELECT id, model, site, price, rating, reviews, scraped_at FROM raw_scrapes"
    df_github = pd.read_sql_query(query, conn)

    print(f"\n📊 Total Records: {len(df_github)}\n")

    # Display settings for full visibility
    pd.set_option('display.max_rows', None)
    pd.set_option('display.max_columns', None)
    pd.set_option('display.width', None)
    pd.set_option('display.max_colwidth', None)

    # Show all records
    print("=" * 100)
    print(df_github.to_string(index=False))
    print("=" * 100)

    # Detailed breakdown by model
    print("\n\n📱 BREAKDOWN BY MODEL:\n")
    for model in df_github['model'].unique():
        model_data = df_github[df_github['model'] == model]
        print(f"{'='*50}")
        print(f"🔹 {model} ({len(model_data)} records)")
        print(f"{'='*50}")
        print(model_data.to_string(index=False))
        print()

    # Statistics
    print("\n📈 STATISTICS:")
    print(f"\nPrice Statistics:")
    print(f"   Overall: ₹{df_github['price'].min()} - ₹{df_github['price'].max()}")
    print(f"   iPhone 15: ₹{df_github[df_github['model']=='iPhone 15']['price'].min()} - ₹{df_github[df_github['model']=='iPhone 15']['price'].max()}")
    print(f"   iPhone 16: ₹{df_github[df_github['model']=='iPhone 16']['price'].min()} - ₹{df_github[df_github['model']=='iPhone 16']['price'].max()}")
    print(f"   iPhone 17: ₹{df_github[df_github['model']=='iPhone 17']['price'].min()} - ₹{df_github[df_github['model']=='iPhone 17']['price'].max()}")

    print(f"\nRating Statistics:")
    print(f"   iPhone 15: {df_github[df_github['model']=='iPhone 15']['rating'].min()} - {df_github[df_github['model']=='iPhone 15']['rating'].max()}")
    print(f"   iPhone 16: {df_github[df_github['model']=='iPhone 16']['rating'].min()} - {df_github[df_github['model']=='iPhone 16']['rating'].max()}")
    print(f"   iPhone 17: {df_github[df_github['model']=='iPhone 17']['rating'].min()} - {df_github[df_github['model']=='iPhone 17']['rating'].max()}")

    # Export to CSV
    print("\n\n" + "=" * 100)
    print("💾 EXPORTING TO CSV...")
    print("=" * 100)

    csv_filename = 'github_database_iphone_real_data.csv'
    df_github.to_csv(csv_filename, index=False)

    print(f"\n✅ Exported to: {csv_filename}")
    print(f"   Records: {len(df_github)}")
    print(f"   Columns: {df_github.columns.tolist()}")

    conn.close()

else:
    print(f"❌ Database not found at: {db_path}")

📱 VIEWING ALL REAL iPHONE RECORDS FROM DATABASE

📊 Total Records: 18

 id     model     site    price  rating                               reviews          scraped_at
  1 iPhone 15   Amazon  ₹71,197     4.5         Battery life could be better. 2025-09-30 10:28:09
  2 iPhone 16   Amazon ₹129,933     4.1         Battery life could be better. 2025-09-30 10:28:14
  3 iPhone 17   Amazon  ₹85,408     3.7     Perfect for daily use and gaming. 2025-09-30 10:28:18
  4 iPhone 15 Flipkart  ₹93,640     4.9     Perfect for daily use and gaming. 2025-09-30 10:28:22
  5 iPhone 16 Flipkart ₹124,767     4.4  Excellent phone with amazing camera. 2025-09-30 10:28:27
  6 iPhone 17 Flipkart ₹144,713     4.0    Value for money, highly recommend. 2025-09-30 10:28:32
  7 iPhone 15   Amazon ₹148,357     4.3    Value for money, highly recommend. 2025-09-30 11:03:12
  8 iPhone 16   Amazon  ₹74,869     4.2  Excellent phone with amazing camera. 2025-09-30 11:03:17
  9 iPhone 17   Amazon ₹137,457     4.1     Perf

## 1. Install Dependencies

In [10]:
!pip install google-generativeai pandas numpy scikit-learn joblib -q
print("✅ All dependencies installed")

✅ All dependencies installed


## 2. Load Saved ML Model & Artifacts

In [11]:
import joblib
import pickle
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

print("📦 Loading ML model artifacts...")

model = joblib.load('iphone_price_prediction_model_random_forest.pkl')
scaler = joblib.load('price_prediction_scaler.pkl')

with open('price_prediction_encoders.pkl', 'rb') as f:
    encoders = pickle.load(f)
le_model = encoders['model_encoder']
le_source = encoders['source_encoder']

with open('price_prediction_features.pkl', 'rb') as f:
    feature_info = pickle.load(f)
all_features = feature_info['all_features']
numerical_features = feature_info['numerical_features']

print(f"✅ ML model loaded: {len(all_features)} features")
print(f"✅ Encoders loaded")
print(f"✅ Scaler loaded")

📦 Loading ML model artifacts...
✅ ML model loaded: 33 features
✅ Encoders loaded
✅ Scaler loaded


## 3. Clone GitHub & Load Real Data

In [12]:
import subprocess
import os
import sqlite3

# Load synthetic data first
df = pd.read_csv('enhanced_synthetic_dataset_with_timestamps.csv')
df['Scraped_At'] = pd.to_datetime(df['Scraped_At'])
print(f"✅ Loaded synthetic data: {len(df)} records")

# Try to load real data from GitHub
try:
    db_path = 'Real-time-competitor-strategy-tracker/competitor_tracker.db'
    if os.path.exists(db_path):
        conn = sqlite3.connect(db_path)
        real_df = pd.read_sql_query("""
            SELECT * FROM raw_scrapes
            WHERE model IN ('iPhone 15', 'iPhone 16', 'iPhone 17')
        """, conn)
        conn.close()

        if len(real_df) > 0:
            real_df = real_df.rename(columns={
                'model': 'Model',
                'site': 'Source',
                'price': 'Price',
                'reviews': 'Reviews',
                'rating': 'Rating',
                'url': 'URL'
            })
            if 'Scraped_At' not in real_df.columns:
                real_df['Scraped_At'] = datetime.now()
            else:
                real_df['Scraped_At'] = pd.to_datetime(real_df['scraped_at'])

            # Keep only required columns
            required_cols = ['Model', 'Source', 'Price', 'Rating', 'Reviews', 'Scraped_At']
            real_df = real_df[required_cols]

            df = pd.concat([df, real_df], ignore_index=True)
            print(f"✅ Loaded real data: {len(real_df)} records")
            print(f"✅ Combined dataset: {len(df)} total records")
        else:
            print("⚠️ No real data found, using synthetic only")
    else:
        print(f"⚠️ Database file not found at {db_path}")
except Exception as e:
    print(f"⚠️ Error loading real data: {e}")

print(f"\n📊 Final dataset: {len(df)} records from {df['Model'].nunique()} models")
print(f"   Models: {sorted(df['Model'].unique().tolist())}")
print(f"   Sources: {sorted(df['Source'].unique().tolist())}")

✅ Loaded synthetic data: 2500 records
✅ Loaded real data: 18 records
✅ Combined dataset: 2518 total records

📊 Final dataset: 2518 records from 3 models
   Models: ['iPhone 15', 'iPhone 16', 'iPhone 17']
   Sources: ['Amazon', 'Flipkart']


## 4. Data Cleanup & Validation

In [13]:
print("🧹 COMPREHENSIVE DATA CLEANUP & VALIDATION")
print("=" * 80)

# Step 1: Clean Price column
print("\n1️⃣ Cleaning Price column...")
df['Price'] = pd.to_numeric(df['Price'], errors='coerce')
initial_rows = len(df)
df = df.dropna(subset=['Price'])
df = df[df['Price'] > 0]
print(f"   Removed {initial_rows - len(df)} invalid price records")
print(f"   Price range: ₹{df['Price'].min():,.0f} - ₹{df['Price'].max():,.0f}")

# Step 2: Ensure required columns
print("\n2️⃣ Validating columns...")
required_cols = ['Model', 'Source', 'Price', 'Rating', 'Reviews', 'Scraped_At']
for col in required_cols:
    if col not in df.columns:
        print(f"   Adding missing column: {col}")
        if col == 'Rating':
            df[col] = 4.2
        elif col == 'Reviews':
            df[col] = "Good product"
        else:
            df[col] = ""
    else:
        print(f"   ✅ {col}")

# Step 3: Convert to numeric types
print("\n3️⃣ Converting data types...")
df['Price'] = df['Price'].astype(float)
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce').fillna(4.2)
df['Reviews'] = df['Reviews'].astype(str)
print(f"   ✅ All types converted")

# Step 4: Fix timestamps
print("\n4️⃣ Fixing timestamps...")
df['Scraped_At'] = pd.to_datetime(df['Scraped_At'], errors='coerce')
df = df.dropna(subset=['Scraped_At'])
print(f"   ✅ Datetime: valid")

# Step 5: Final validation
print("\n5️⃣ Final validation...")
print(f"   Total records: {len(df)}")
print(f"   Models: {sorted(df['Model'].unique().tolist())}")
print(f"   Sources: {sorted(df['Source'].unique().tolist())}")
print(f"   Date range: {df['Scraped_At'].min().date()} to {df['Scraped_At'].max().date()}")

print("\n" + "=" * 80)
print("✅ DATA CLEANUP COMPLETE!")
print("=" * 80)

🧹 COMPREHENSIVE DATA CLEANUP & VALIDATION

1️⃣ Cleaning Price column...
   Removed 18 invalid price records
   Price range: ₹53,229 - ₹205,643

2️⃣ Validating columns...
   ✅ Model
   ✅ Source
   ✅ Price
   ✅ Rating
   ✅ Reviews
   ✅ Scraped_At

3️⃣ Converting data types...
   ✅ All types converted

4️⃣ Fixing timestamps...
   ✅ Datetime: valid

5️⃣ Final validation...
   Total records: 2500
   Models: ['iPhone 15', 'iPhone 16', 'iPhone 17']
   Sources: ['Amazon', 'Flipkart']
   Date range: 2025-04-19 to 2025-10-16

✅ DATA CLEANUP COMPLETE!


## 5. Initialize Gemini LLM

In [14]:
import google.generativeai as genai

GEMINI_API_KEY = "AIzaSyDmolAXZRas_1fFAvjIvOdeAfE1HQrIXM8"
genai.configure(api_key=GEMINI_API_KEY)

LATEST_MODEL = 'models/gemini-2.5-flash'

try:
    llm = genai.GenerativeModel(LATEST_MODEL)
    test = llm.generate_content("Say hi")
    print(f"✅ Gemini LLM ready: {LATEST_MODEL}")
except Exception as e:
    print(f"❌ LLM failed: {e}")
    llm = None

✅ Gemini LLM ready: models/gemini-2.5-flash


## 6. Helper Functions

In [15]:
# ============================================================================
# FUNCTION: Get LATEST current price by timestamp (from GitHub database ONLY)
# ============================================================================

def get_latest_current_price(model_name, source):
    """
    Get the LATEST price for a model on a specific source
    Based on the most recent timestamp from GitHub database
    """
    # Filter for model and source from GitHub data
    data = df_github[(df_github['model'] == model_name) & (df_github['site'] == source)].copy()

    if len(data) == 0:
        return None, None, None, None  # price, timestamp, review, rating

    # Sort by timestamp descending (most recent first)
    data['scraped_at'] = pd.to_datetime(data['scraped_at'])
    data = data.sort_values('scraped_at', ascending=False)

    # Get the LATEST record
    latest_record = data.iloc[0]

    # Extract price as string, convert to float
    price_str = str(latest_record['price']).replace(',', '').replace('₹', '')
    price = float(price_str) if price_str else 0

    timestamp = latest_record['scraped_at']
    review = latest_record['reviews'][:50] if pd.notna(latest_record['reviews']) else ""
    rating = float(latest_record['rating']) if pd.notna(latest_record['rating']) else 4.0

    return price, timestamp, review, rating

print("✅ Helper function loaded - Uses LATEST prices from GitHub database")

✅ Helper function loaded - Uses LATEST prices from GitHub database


## 7. ML Model Prediction Function

In [16]:
def predict_iphone_price(model_name, source, rating=4.2, review_text="Good phone", target_date=None):
    if target_date is None:
        target_date = datetime.now()

    pred = pd.DataFrame({
        'Model': [model_name],
        'Source': [source],
        'Rating': [rating],
        'Reviews': [review_text],
        'Scraped_At': [target_date]
    })

    pred['Year'] = pred['Scraped_At'].dt.year
    pred['Month'] = pred['Scraped_At'].dt.month
    pred['Day'] = pred['Scraped_At'].dt.day
    pred['Hour'] = pred['Scraped_At'].dt.hour
    pred['DayOfWeek'] = pred['Scraped_At'].dt.dayofweek
    pred['DayOfYear'] = pred['Scraped_At'].dt.dayofyear
    pred['WeekOfYear'] = pred['Scraped_At'].dt.isocalendar().week
    pred['Quarter'] = pred['Scraped_At'].dt.quarter
    pred['DaysAgo'] = (df['Scraped_At'].max() - pred['Scraped_At']).dt.days

    pred['IsWeekend'] = pred['DayOfWeek'].isin([5, 6]).astype(int)
    pred['IsHolidaySeason'] = pred['Month'].isin([11, 12]).astype(int)
    pred['IsLaunchSeason'] = pred['Month'].isin([9, 10]).astype(int)
    pred['IsSummerSeason'] = pred['Month'].isin([4, 5, 6]).astype(int)

    pred['ReviewLength'] = pred['Reviews'].str.len()
    pred['ReviewWordCount'] = pred['Reviews'].str.split().str.len()
    pred['HasExclamation'] = pred['Reviews'].str.contains('!').astype(int)
    pred['HasQuestion'] = pred['Reviews'].str.contains('\\?').astype(int)

    pred['Model_Encoded'] = le_model.transform([model_name])[0]
    pred['Source_Encoded'] = le_source.transform([source])[0]

    day_name = target_date.strftime('%A')
    month_name = target_date.strftime('%B')

    try:
        pred['DayName_Encoded'] = le_day_name.transform([day_name])[0]
    except:
        pred['DayName_Encoded'] = 3

    try:
        pred['MonthName_Encoded'] = le_month_name.transform([month_name])[0]
    except:
        pred['MonthName_Encoded'] = pred['Month'].iloc[0]

    hour = pred['Hour'].iloc[0]
    if 6 <= hour < 12: tod = 'Morning'
    elif 12 <= hour < 18: tod = 'Afternoon'
    elif 18 <= hour < 22: tod = 'Evening'
    else: tod = 'Night'

    try:
        pred['TimeOfDay_Encoded'] = le_time_of_day.transform([tod])[0]
    except:
        pred['TimeOfDay_Encoded'] = 1

    pred['Model_Source_Interaction'] = pred['Model_Encoded'] * pred['Source_Encoded']
    pred['Rating_Month_Interaction'] = pred['Rating'] * pred['Month']
    pred['Rating_ReviewLength_Interaction'] = pred['Rating'] * pred['ReviewLength']

    model_data = df[df['Model'] == model_name]
    pred['Model_Price_mean'] = model_data['Price'].mean()
    pred['Model_Price_std'] = model_data['Price'].std()
    pred['Model_Price_min'] = model_data['Price'].min()
    pred['Model_Price_max'] = model_data['Price'].max()
    pred['Model_Price_median'] = model_data['Price'].median()

    pred['Price_7Day_MA'] = model_data['Price'].tail(7).mean()
    pred['Price_30Day_MA'] = model_data['Price'].tail(30).mean()

    X_pred = pred[all_features].fillna(0)
    X_pred[numerical_features] = scaler.transform(X_pred[numerical_features])

    return model.predict(X_pred)[0]

print("✅ ML prediction function ready")

✅ ML prediction function ready


## 8. Gemini Prediction & Analysis Functions

In [17]:
def get_gemini_price_prediction(model_name, source, target_date):
    if not llm:
        return None

    prompt = f"""Based on market trends for iPhone {model_name.split()[-1]},
    predict the most likely market price on {source} for {target_date.strftime('%B %d, %Y')} in Indian Rupees.
    Consider competitor pricing, demand, seasonality.
    Reply with ONLY a number (e.g., 75000)"""

    try:
        result = llm.generate_content(prompt)
        import re
        match = re.search(r'\d+', result.text.replace(',', ''))
        return float(match.group()) if match else None
    except:
        return None

def get_gemini_analysis(model_name, source, ml_price, gemini_price, current_price, target_date):
    if not llm:
        return "Market analysis unavailable"

    prompt = f"""Analyze iPhone {model_name} pricing on {source} for {target_date.strftime('%B %d, %Y')}:
    - ML Model predicts: ₹{ml_price:,.0f}
    - Gemini market analysis suggests: ₹{gemini_price:,.0f}
    - Current latest market price: ₹{current_price:,.0f}

    Provide BRIEF 2-line analysis on:
    1. Market sentiment (bullish/bearish/neutral)
    2. Technical insight (overpriced/underpriced/fair)

    Format: "📊 Market: [sentiment]. 📈 Technical: [insight]"""

    try:
        result = llm.generate_content(prompt)
        return result.text[:200]
    except:
        return "Market analysis unavailable"

def get_gemini_optimal_price(model_name, source, ml_price, gemini_price, current_price):
    if not llm:
        prices = [ml_price, current_price]
        if gemini_price and gemini_price > 0:
            prices.append(gemini_price)
        return float(np.mean(prices))

    prompt = f"""Given three price signals for {model_name} on {source}:
    - ML Model prediction: ₹{ml_price:,.0f}
    - Gemini market analysis: ₹{gemini_price:,.0f}
    - Current latest market price: ₹{current_price:,.0f}

    Recommend ONE optimal selling price that balances competitiveness and profitability.
    Reply ONLY with a single number (INR)."""

    try:
        result = llm.generate_content(prompt)
        import re
        match = re.search(r'\d+', result.text.replace(',', ''))
        if match:
            return float(match.group())
    except:
        pass

    prices = [ml_price, current_price]
    if gemini_price and gemini_price > 0:
        prices.append(gemini_price)
    return float(np.mean(prices))

print("✅ Gemini functions ready")

✅ Gemini functions ready


## 9. Safe Wrapper Functions

In [18]:
def safe_predict_iphone_price(model_name, source, rating=4.2, review_text="Good phone", target_date=None):
    try:
        return predict_iphone_price(model_name, source, rating, review_text, target_date)
    except Exception as e:
        model_data = df[df['Model'] == model_name]
        if len(model_data) > 0:
            return float(model_data['Price'].mean())
        return 75000.0

def safe_get_gemini_price(model_name, source, target_date):
    try:
        return get_gemini_price_prediction(model_name, source, target_date)
    except:
        return None

def safe_get_gemini_analysis(model_name, source, ml_price, gemini_price, current_price, target_date):
    try:
        return get_gemini_analysis(model_name, source, ml_price, gemini_price, current_price, target_date)
    except:
        return "📊 Market: Neutral. 📈 Technical: Fair value"

def safe_get_optimal_price(model_name, source, ml_price, gemini_price, current_price):
    try:
        return get_gemini_optimal_price(model_name, source, ml_price, gemini_price, current_price)
    except:
        prices = [ml_price, current_price]
        if gemini_price and gemini_price > 0:
            prices.append(gemini_price)
        return float(np.mean(prices))

print("✅ Safe wrapper functions ready")

✅ Safe wrapper functions ready


## 10. Dashboard Management

In [19]:
# Initialize dashboard
dashboard = pd.DataFrame(columns=[
    'DateTime', 'Model', 'Source', 'ML_Predicted', 'Gemini_Predicted',
    'Current_Price', 'Current_Timestamp', 'Current_Rating', 'Optimal_Price', 'Analysis', 'Review'
])

all_predictions = []

def update_dashboard_batch(model_name, predictions_list):
    global dashboard

    for pred in predictions_list:
        source = pred['source']
        dashboard = dashboard[~((dashboard['Model'] == model_name) & (dashboard['Source'] == source))]

        new_row = pd.DataFrame({
            'DateTime': [datetime.now().strftime('%Y-%m-%d %H:%M:%S')],
            'Model': [model_name],
            'Source': [source],
            'ML_Predicted': [f"₹{pred['ml_pred']:,.0f}"],
            'Gemini_Predicted': [f"₹{pred['gemini_pred']:,.0f}" if pred['gemini_pred'] else "N/A"],
            'Current_Price': [f"₹{pred['current_price']:,.0f}"],
            'Current_Timestamp': [pred['current_timestamp'].strftime('%Y-%m-%d %H:%M:%S')],
            'Current_Rating': [f"{pred['current_rating']}"],
            'Optimal_Price': [f"₹{pred['optimal_price']:,.0f}"],
            'Analysis': [pred['analysis'][:80]],
            'Review': [pred['review'][:40]]
        })

        dashboard = pd.concat([dashboard, new_row], ignore_index=True)

    return dashboard

print("✅ Dashboard ready")

✅ Dashboard ready


## 11. Advanced Chatbot with Latest Prices

In [20]:
def advanced_iphone_chatbot():
    global dashboard, all_predictions

    print("=" * 100)
    print("🤖 ADVANCED iPHONE PRICE PREDICTION CHATBOT (with LATEST prices)")
    print("=" * 100)
    print("\n📊 Features:")
    print("  • ML Model + Gemini LLM price predictions")
    print("  • Real data from GitHub (LATEST by timestamp) + synthetic data")
    print("  • Market & technical analysis")
    print("  • Gemini-powered optimal pricing")
    print("  • Real-time dashboard tracking (saves ALL sources)")
    print("\n Commands:")
    print("  • Just ask naturally: 'price for iPhone 16 on Amazon'")
    print("  • 'compare iPhone 15' - Compare both sources")
    print("  • 'update' - Save ALL last predictions to dashboard")
    print("  • 'dashboard' - View all tracked predictions")
    print("  • 'quit' - Exit\n")

    conversation_history = []
    all_predictions = []

    while True:
        user_input = input("You: ").strip()

        if not user_input:
            continue

        if user_input.lower() in ['quit', 'exit', 'bye']:
            print("\n👋 Thank you for using Advanced iPhone Price Predictor!")
            break

        # Handle dashboard view
        if user_input.lower() == 'dashboard':
            if len(dashboard) == 0:
                print("\nAssistant: 📊 Dashboard is empty. Make predictions first!\n")
            else:
                print("\n" + "="*120)
                print("📊 PREDICTION DASHBOARD")
                print("="*120)
                print(dashboard.to_string(index=False))
                print("="*120 + "\n")
            continue

        # Handle update command
        if user_input.lower() == 'update':
            if len(all_predictions) > 0:
                pred_list = all_predictions
                model_name = pred_list[0]['model']

                dashboard_updated = update_dashboard_batch(model_name, pred_list)
                dashboard = dashboard_updated

                sources_saved = ', '.join([p['source'] for p in pred_list])
                print(f"\nAssistant: ✅ Dashboard updated with {len(pred_list)} predictions!")
                print(f"   Saved: {sources_saved}\n")

                all_predictions = []
            else:
                print(f"\nAssistant: ⚠️ No predictions to save. Make a prediction first!\n")
            continue

        # Parse user input
        user_lower = user_input.lower()

        # Extract model
        model_name = None
        if 'iphone 17' in user_lower or 'iphone17' in user_lower or 'model 17' in user_lower or '17' in user_lower.split():
            model_name = 'iPhone 17'
        elif 'iphone 16' in user_lower or 'iphone16' in user_lower or 'model 16' in user_lower or '16' in user_lower.split():
            model_name = 'iPhone 16'
        elif 'iphone 15' in user_lower or 'iphone15' in user_lower or 'model 15' in user_lower or '15' in user_lower.split():
            model_name = 'iPhone 15'

        # Extract sources
        sources = []
        if 'both' in user_lower or ('amazon' in user_lower and 'flipkart' in user_lower):
            sources = ['Amazon', 'Flipkart']
        elif 'amazon' in user_lower:
            sources = ['Amazon']
        elif 'flipkart' in user_lower:
            sources = ['Flipkart']

        # Extract date
        target_date = datetime.now()
        import re
        date_match = re.search(r'(\d{1,2})\s*(st|nd|rd|th)?\s*(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)', user_lower)
        if date_match:
            day = int(date_match.group(1))
            month_str = date_match.group(3)
            month_map = {'jan':1,'feb':2,'mar':3,'apr':4,'may':5,'jun':6,'jul':7,'aug':8,'sep':9,'oct':10,'nov':11,'dec':12}
            month = month_map.get(month_str, datetime.now().month)
            year = 2025
            try:
                target_date = datetime(year, month, day, 12, 0)
            except:
                target_date = datetime.now()

        # Handle compare command
        if ('compare' in user_lower or 'both' in user_lower) and model_name:
            print("\n⏳ Comparing prices across sources...\n")

            comparisons = {}
            all_predictions = []

            for src in ['Amazon', 'Flipkart']:
                ml_pred = safe_predict_iphone_price(model_name, src, target_date=target_date)
                gemini_pred = safe_get_gemini_price(model_name, src, target_date)
                current_price, current_timestamp, review, current_rating = get_latest_current_price(model_name, src)

                if current_price is None:
                    current_price = ml_pred
                    current_timestamp = datetime.now()
                    review = ""
                    current_rating = 4.0

                optimal = safe_get_optimal_price(model_name, src, ml_pred, gemini_pred, current_price)
                analysis = safe_get_gemini_analysis(model_name, src, ml_pred, gemini_pred, current_price, target_date)

                comparisons[src] = {
                    'ml': ml_pred,
                    'gemini': gemini_pred,
                    'current': current_price,
                    'current_ts': current_timestamp,
                    'current_rating': current_rating,
                    'optimal': optimal
                }

                all_predictions.append({
                    'model': model_name,
                    'source': src,
                    'ml_pred': ml_pred,
                    'gemini_pred': gemini_pred,
                    'current_price': current_price,
                    'current_timestamp': current_timestamp,
                    'current_rating': current_rating,
                    'optimal_price': optimal,
                    'analysis': analysis,
                    'review': review
                })

            response = f"\n{'='*100}\n"
            response += f"🔄 **PRICE COMPARISON - {model_name}**\n"
            response += f"📅 **Prediction Date:** {target_date.strftime('%B %d, %Y')}\n\n"

            for src, prices in comparisons.items():
                response += f"🛒 **{src}:**\n"
                response += f"   🤖 ML Predicted: ₹{prices['ml']:,.0f}\n"
                response += f"   🧠 Gemini LLM: ₹{prices['gemini']:,.0f}\n" if prices['gemini'] else ""
                response += f"   💰 Current (Latest): ₹{prices['current']:,.0f}\n"
                response += f"      📅 As of: {prices['current_ts'].strftime('%Y-%m-%d %H:%M:%S')}\n"
                response += f"      ⭐ Rating: {prices['current_rating']}\n"
                response += f"   ✅ Optimal: ₹{prices['optimal']:,.0f}\n\n"

            best_source = min(comparisons, key=lambda x: comparisons[x]['optimal'])
            response += f"🎯 Best deal: {best_source} at ₹{comparisons[best_source]['optimal']:,.0f}\n"
            response += f"💡 Type 'update' to save both to dashboard\n"
            response += f"{'='*100}\n"

            print(f"Assistant: {response}\n")
            continue

        # Handle prediction
        if model_name:
            if len(sources) == 0:
                sources = ['Amazon', 'Flipkart']

            if len(sources) == 2:
                print(f"\n⏳ Analyzing prices for {model_name} on both platforms...\n")

                all_predictions = []

                for source in sources:
                    ml_pred = safe_predict_iphone_price(model_name, source, target_date=target_date)
                    gemini_pred = safe_get_gemini_price(model_name, source, target_date)
                    current_price, current_timestamp, review, current_rating = get_latest_current_price(model_name, source)

                    if current_price is None:
                        current_price = ml_pred
                        current_timestamp = datetime.now()
                        review = ""
                        current_rating = 4.0

                    optimal_price = safe_get_optimal_price(model_name, source, ml_pred, gemini_pred, current_price)
                    analysis = safe_get_gemini_analysis(model_name, source, ml_pred, gemini_pred, current_price, target_date)

                    response = f"\n{'='*100}\n"
                    response += f"🎯 **PRICE ANALYSIS - {model_name} on {source}**\n\n"
                    response += f"📅 **Prediction Timestamp:** {target_date.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
                    response += f"📊 **PREDICTIONS:**\n"
                    response += f"   🤖 ML Model: ₹{ml_pred:,.0f}\n"
                    response += f"   🧠 Gemini LLM: ₹{gemini_pred:,.0f}\n" if gemini_pred else ""
                    response += f"   💰 Current Market (Latest): ₹{current_price:,.0f}\n"
                    response += f"      📅 As of: {current_timestamp.strftime('%Y-%m-%d %H:%M:%S')}\n"
                    response += f"      ⭐ Rating: {current_rating}\n\n"
                    response += f"✅ **OPTIMAL PRICE:** ₹{optimal_price:,.0f}\n\n"
                    response += f"📈 **ANALYSIS:** {analysis}\n"
                    response += f"{'='*100}\n"

                    print(f"Assistant: {response}\n")

                    all_predictions.append({
                        'model': model_name,
                        'source': source,
                        'ml_pred': ml_pred,
                        'gemini_pred': gemini_pred,
                        'current_price': current_price,
                        'current_timestamp': current_timestamp,
                        'current_rating': current_rating,
                        'optimal_price': optimal_price,
                        'analysis': analysis,
                        'review': review
                    })

                print("💡 Type 'update' to save BOTH predictions to dashboard\n")

            else:
                source = sources[0]
                print(f"\n⏳ Analyzing {model_name} price on {source}...\n")

                ml_pred = safe_predict_iphone_price(model_name, source, target_date=target_date)
                gemini_pred = safe_get_gemini_price(model_name, source, target_date)
                current_price, current_timestamp, review, current_rating = get_latest_current_price(model_name, source)

                if current_price is None:
                    current_price = ml_pred
                    current_timestamp = datetime.now()
                    review = ""
                    current_rating = 4.0

                optimal_price = safe_get_optimal_price(model_name, source, ml_pred, gemini_pred, current_price)
                analysis = safe_get_gemini_analysis(model_name, source, ml_pred, gemini_pred, current_price, target_date)

                response = f"\n{'='*100}\n"
                response += f"🎯 **PRICE ANALYSIS - {model_name} on {source}**\n\n"
                response += f"📅 **Prediction Timestamp:** {target_date.strftime('%Y-%m-%d %H:%M:%S')}\n\n"
                response += f"📊 **PREDICTIONS:**\n"
                response += f"   🤖 ML Model: ₹{ml_pred:,.0f}\n"
                response += f"   🧠 Gemini LLM: ₹{gemini_pred:,.0f}\n" if gemini_pred else ""
                response += f"   💰 Current Market (Latest): ₹{current_price:,.0f}\n"
                response += f"      📅 As of: {current_timestamp.strftime('%Y-%m-%d %H:%M:%S')}\n"
                response += f"      ⭐ Rating: {current_rating}\n\n"
                response += f"✅ **OPTIMAL PRICE:** ₹{optimal_price:,.0f}\n\n"
                response += f"📈 **ANALYSIS:** {analysis}\n"
                response += f"💡 Type 'update' to save to dashboard\n"
                response += f"{'='*100}\n"

                print(f"Assistant: {response}\n")

                all_predictions = [{
                    'model': model_name,
                    'source': source,
                    'ml_pred': ml_pred,
                    'gemini_pred': gemini_pred,
                    'current_price': current_price,
                    'current_timestamp': current_timestamp,
                    'current_rating': current_rating,
                    'optimal_price': optimal_price,
                    'analysis': analysis,
                    'review': review
                }]

            conversation_history.append({"role": "User", "content": user_input})
        else:
            print("\nAssistant: I couldn't identify the iPhone model. Please mention:\n")
            print("  - iPhone 15, 16, or 17\n")
            print("Examples:")
            print("  - 'iPhone 16 on Amazon'")
            print("  - 'price for 17 on both'")
            print("  - 'compare iPhone 15'\n")

print("✅ Enhanced chatbot with LATEST prices ready!")

✅ Enhanced chatbot with LATEST prices ready!


## 12. Run the Chatbot

In [21]:
advanced_iphone_chatbot()

🤖 ADVANCED iPHONE PRICE PREDICTION CHATBOT (with LATEST prices)

📊 Features:
  • ML Model + Gemini LLM price predictions
  • Real data from GitHub (LATEST by timestamp) + synthetic data
  • Market & technical analysis
  • Gemini-powered optimal pricing
  • Real-time dashboard tracking (saves ALL sources)

 Commands:
  • Just ask naturally: 'price for iPhone 16 on Amazon'
  • 'compare iPhone 15' - Compare both sources
  • 'update' - Save ALL last predictions to dashboard
  • 'dashboard' - View all tracked predictions
  • 'quit' - Exit

You: predict prices for iphone 15

⏳ Analyzing prices for iPhone 15 on both platforms...

Assistant: 
🎯 **PRICE ANALYSIS - iPhone 15 on Amazon**

📅 **Prediction Timestamp:** 2025-10-31 09:02:24

📊 **PREDICTIONS:**
   🤖 ML Model: ₹116,771
   🧠 Gemini LLM: ₹54,000
   💰 Current Market (Latest): ₹70,835
      📅 As of: 2025-09-30 11:08:33
      ⭐ Rating: 4.5

✅ **OPTIMAL PRICE:** ₹70,835

📈 **ANALYSIS:** 📊 Market: **Bearish**, reflecting the iPhone 15's expect

## 13. Export Dashboard (Optional)

In [22]:
# Export dashboard to CSV
if len(dashboard) > 0:
    dashboard.to_csv('price_prediction_dashboard.csv', index=False)
    print(f"✅ Dashboard exported to 'price_prediction_dashboard.csv'")
    print(f"   Records: {len(dashboard)}")
else:
    print("Dashboard is empty. Make predictions first!")

✅ Dashboard exported to 'price_prediction_dashboard.csv'
   Records: 2


In [23]:
import pandas as pd

df = pd.read_csv("price_prediction_dashboard.csv")
df.head()

Unnamed: 0,DateTime,Model,Source,ML_Predicted,Gemini_Predicted,Current_Price,Current_Timestamp,Current_Rating,Optimal_Price,Analysis,Review
0,2025-10-31 09:06:39,iPhone 15,Amazon,"₹116,771","₹54,000","₹70,835",2025-09-30 11:08:33,4.5,"₹70,835","📊 Market: **Bearish**, reflecting the iPhone 15's expected depreciation as a two",Battery life could be better.
1,2025-10-31 09:06:39,iPhone 15,Flipkart,"₹116,233","₹50,000","₹89,674",2025-09-30 11:08:47,4.3,"₹97,642",📊 Market: Neutral. 📈 Technical: Overpriced,Smooth performance and great display.
