In [21]:

import joblib

# Load the pipeline
rf_model = joblib.load("rf_salary_model.pkl")

print("✅ Model loaded and ready to use.")

✅ Model loaded and ready to use.


In [22]:

import pandas as pd

def predict_salary(age, gender, education, job_title):
    # Create a dataframe from user inputs
    input_data = pd.DataFrame({
        "Age": [age],
        "Gender": [gender],
        "Education Level": [education],
        "Job Title": [job_title]
    })

    # Predict using the loaded pipeline
    predicted_salary = rf_model.predict(input_data)[0]

    # Save the data (append mode)
    new_entry = input_data.copy()
    new_entry["Predicted Salary"] = predicted_salary
    new_entry.to_csv("predicted_salaries.csv", mode="a", header=False, index=False)

    return "Thank you! Your response has been recorded."

In [23]:

import gradio as gr

# Define the app interface
with gr.Blocks() as demo:
    gr.Markdown("Please answer the following questions:")

    age = gr.Number(label="What is your Age?")
    gender = gr.Dropdown(["Male", "Female", "Other"], label="What is your Gender?")
    education = gr.Dropdown(
        ["High School", "Bachelor's", "Master's", "PhD", "Other"],
        label="What is your Education Level?"
    )
    job_title = gr.Textbox(label="What is your Job Title?")

    submit_btn = gr.Button("Submit")

    output = gr.Textbox(label="Message", interactive=False)

    submit_btn.click(predict_salary, inputs=[age, gender, education, job_title], outputs=output)

demo.launch()

* Running on local URL:  http://127.0.0.1:7862
* To create a public link, set `share=True` in `launch()`.




In [24]:
from dotenv import load_dotenv
import os
from openai import OpenAI

load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

In [25]:
md_path = "world_tour_guide.md"
with open(md_path, "r", encoding="utf-8") as f:
    md_text = f.read()

In [26]:
prompt = f"""
You are a data extraction model. The following text is a Markdown travel guide that contains
sections about different cities, each with attractions and an estimated yearly salary guideline.

Please extract this data and return **ONLY a valid JSON array**, no extra text.
Format:
[
  {{
    "city": "City name, Country",
    "attractions": "short summary of activities",
    "estimated_salary": 20000-40000
  }},
  ...
]

Markdown file:
---
{md_text[:8000]}
---
"""

response = client.chat.completions.create(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": "Extract structured JSON data from the Markdown."},
        {"role": "user", "content": prompt}
    ],
    temperature=0
)

raw_output = response.choices[0].message.content.strip()

# --- Try to safely extract JSON ---
try:
    # Remove Markdown code fences if present
    json_str = re.search(r"\[.*\]", raw_output, re.DOTALL).group(0)
    city_docs = json.loads(json_str)
except Exception as e:
    print("⚠️ Could not parse JSON directly. Printing model output for debugging:\n")
    print(raw_output[:1000])  # show first part
    raise e

print(f"✅ Loaded {len(city_docs)} cities from GPT parsing\n")
for c in city_docs[:3]:
    print(c["city"], "→", c["estimated_salary"])

✅ Loaded 19 cities from GPT parsing

Bangkok, Thailand → 24000
Hoi An, Vietnam → 21700
Barcelona, Spain → 50000


In [27]:
import json

with open("parsed_city_data.json", "w", encoding="utf-8") as f:
    json.dump(city_docs, f, indent=2, ensure_ascii=False)

#### Let’s connect everything built so far:

##### ✅ 1. The trained Random Forest model (rf_salary_model.pkl)
##### ✅ 2. The structured city data you extracted with GPT (city_docs)
##### ✅ 3. A Gradio interface that collects the 4 user answers and predicts salary
##### ✅ 4. GPT again to generate a Markdown travel brochure for the user

#### 🧠 Overview 

##### 1. User answers: Age, Gender, Education Level, Job Title

##### 2. Model predicts salary (but we don’t show it to the user)

##### 3. Based on that salary → select the closest matching cities from city_docs

##### 4. Send that info to GPT to generate a nice Markdown brochure

##### 5. Show the Markdown result to the user in Gradio

In [29]:
import gradio as gr
import pickle
import numpy as np
import json
from openai import OpenAI
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))

# Load trained RF model pipeline
with open("rf_salary_model.pkl", "rb") as f:
    model = pickle.load(f)

# Load structured city data 
with open("parsed_city_data.json", "r", encoding="utf-8") as f:
    city_docs = json.load(f)

print(f"✅ Loaded {len(city_docs)} cities and the trained salary model")


✅ Loaded 19 cities and the trained salary model


In [30]:
def predict_salary(age, gender, education, job):
    input_data = {
        "Age": [int(age)],
        "Gender": [gender],
        "Education Level": [education],
        "Job Title": [job]
    }
    predicted_salary = model.predict(pd.DataFrame(input_data))[0]
    return predicted_salary


In [31]:
def find_best_cities(pred_salary, n=3):
    cities_sorted = sorted(
        city_docs,
        key=lambda c: abs(c["estimated_salary"] - pred_salary)
    )
    return cities_sorted[:n]


In [32]:
def create_brochure(pred_salary, cities):
    city_descriptions = "\n\n".join([
        f"### {c['city']}\n{c['attractions']}" for c in cities
    ])

    prompt = f"""
You are a travel guide writer.

A user’s estimated yearly income is **${pred_salary:,.0f}**.
Based on this salary, suggest 2–3 great travel destinations from the following data,
and write a well-structured Markdown brochure with:
- A warm short introduction based on their budget level
- One section per city with highlights and attractions
- A closing recommendation paragraph

Here are the available city options:
{city_descriptions}
"""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": "You write engaging Markdown travel brochures."},
            {"role": "user", "content": prompt}
        ],
        temperature=0.7
    )
    return response.choices[0].message.content.strip()


In [33]:
import pandas as pd

def process_input(age, gender, education, job):
    # Step 1: Predict salary
    salary = predict_salary(age, gender, education, job)

    # Step 2: Select suitable cities
    top_cities = find_best_cities(salary)

    # Step 3: Generate brochure text
    brochure_md = create_brochure(salary, top_cities)

    return brochure_md


# Create Gradio app
demo = gr.Interface(
    fn=process_input,
    inputs=[
        gr.Number(label="Your Age", precision=0),
        gr.Radio(["Male", "Female", "Other"], label="Gender"),
        gr.Dropdown(["High School", "Bachelor's", "Master's", "PhD"], label="Education Level"),
        gr.Textbox(label="Job Title")
    ],
    outputs=gr.Markdown(label="Your Personalized Travel Brochure"),
    title="🌍 Travel Brochure Recommender",
    description="Answer 4 quick questions please."
)

demo.launch()

* Running on local URL:  http://127.0.0.1:7863
* To create a public link, set `share=True` in `launch()`.


