In [None]:
import requests
import pandas as pd
import time
import streamlit as st
import openai
import uuid
import os
import sys
import subprocess


In [None]:
# Fetch country metadata
url = "http://api.worldbank.org/v2/country?format=json&per_page=300"
response = requests.get(url)
if response.status_code == 200:
    metadata = response.json()
    countries = metadata[1]
    individual_countries = [c['id'] for c in countries if c['region']['value'] != "Aggregates"]
else:
    print("Failed to fetch country metadata")
    individual_countries = []

# Initialize variables
all_data = []
page = 1
MAX_PAGES = 50  # Maximum number of pages to fetch

# Fetch GDP data for individual countries only
for country in individual_countries:
    print(f"Fetching data for {country}...")
    page = 1
    while page <= MAX_PAGES:
        url = f"http://api.worldbank.org/v2/country/{country}/indicator/NY.GDP.MKTP.CD?format=json&page={page}"
        response = requests.get(url)
        
        if response.status_code != 200:
            print(f"Failed to retrieve data for {country} on page {page}")
            break
        
        data = response.json()
        if len(data) < 2 or not data[1]:
            break  # Exit if there is no more data

        # Extract records
        records = data[1]
        for record in records:
            if record['value'] is not None:  # Skip null values
                country_name = record['country']['value']
                year = record['date']
                gdp = record['value']
                all_data.append([country_name, year, gdp])
        
        page += 1  # Move to the next page
        time.sleep(1)  # Avoid rate-limiting

# Convert to DataFrame
df = pd.DataFrame(all_data, columns=['Country', 'Year', 'GDP'])

# Clean the data
df['Year'] = df['Year'].astype(int)
df['GDP'] = pd.to_numeric(df['GDP'], errors='coerce')
df = df[df['Year'] >= 2000]  # Filter data from the year 2000 onwards

# Save to CSV
df.to_csv('world_bank_gdp_cleaned.csv', index=False)
print("Data successfully saved to 'world_bank_gdp_cleaned.csv'")


In [None]:
# Load the cleaned data
df = pd.read_csv('world_bank_gdp_cleaned.csv')

# Display the first few rows
print(df.head())


summaries = []
for _, row in df.iterrows():
    summaries.append(f"In {row['Year']}, {row['Country']} had a GDP of ${row['GDP']:,.0f}.")
    
# Example output
print(summaries[:5])


table_text = ""
for _, row in df.iterrows():
    table_text += f"Country: {row['Country']}, Year: {row['Year']}, GDP: ${row['GDP']:,.0f}\n"

# Example output
print(table_text[:500])

In [None]:
# Set your valid OpenAI API key
openai.api_key = "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"  # Replace with your key

# Load the preprocessed prompt
with open('gdp_table.txt', 'r') as f:
    prompt = f.read()

# Reduce input length
prompt = prompt[-12000:]  # Adjust size to fit within token limits

# Define a question
question = "Which country had the highest GDP in 2020?"

# Make a GPT-3.5-turbo or GPT-4 API call
response = openai.ChatCompletion.create(
    model="gpt-3.5-turbo",
    messages=[
        {"role": "system", "content": "You are an assistant trained to analyze economic data."},
        {"role": "user", "content": prompt},  # Load the data as context
        {"role": "user", "content": question}  # Add your question
    ],
    max_tokens=500,
    temperature=0.7
)

# Output the response
print(response['choices'][0]['message']['content'])


In [None]:
# Automatically start Streamlit if the script is run directly
if __name__ == "__main__":
    # Check if the script is already running in Streamlit
    if not os.getenv("IS_RUNNING_STREAMLIT"):
        # Set an environment variable to avoid recursion
        os.environ["IS_RUNNING_STREAMLIT"] = "1"
        
        # Launch Streamlit
        streamlit_command = f"streamlit run {os.path.abspath(__file__)}"
        print(f"Launching Streamlit app: {streamlit_command}")
        subprocess.run(streamlit_command, shell=True)
        sys.exit()

# Set your OpenAI API key
openai.api_key = "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"  # Replace with your API key

# Initialize session state variables
if 'messages' not in st.session_state:
    st.session_state['messages'] = []
if 'chat_id' not in st.session_state:
    st.session_state['chat_id'] = str(uuid.uuid4())
if 'data_context' not in st.session_state:
    st.session_state['data_context'] = ""

# Load predefined data only once
if not st.session_state['data_context']:
    file_path = "/Users/riis57/gdp_summaries.txt"  # Replace with your file path
    try:
        # Read the text file (but do NOT display it)
        with open(file_path, 'r') as file:
            st.session_state['data_context'] = file.read()
    except FileNotFoundError:
        st.error(f"The file at {file_path} could not be found. Please check the file path.")
        st.stop()

# Streamlit app title
st.title("GDP Data Analysis with GPT")

# Display the conversation using st.chat_message
for message in st.session_state['messages']:
    if message['role'] == 'user':
        with st.chat_message("user"):
            st.write(message['content'])
    else:
        with st.chat_message("assistant"):
            st.write(message['content'])

# Accept user input using st.chat_input
user_input = st.chat_input("Ask your question about the data:")

if user_input:
    # Append the user's message to the session state
    st.session_state['messages'].append({'role': 'user', 'content': user_input})

    # Display the user's message
    with st.chat_message("user"):
        st.write(user_input)

    # Trim the data context if it exceeds token limits
    data_context = st.session_state['data_context']
    if len(data_context) > 12000:  # Adjust the limit as needed
        data_context = data_context[:12000]

    # Send the question to GPT
    try:
        response = openai.ChatCompletion.create(
            model="gpt-3.5-turbo",
            messages=[
                {"role": "system", "content": "You are an assistant trained to analyze economic data."},
                {"role": "user", "content": data_context},
                {"role": "user", "content": user_input}
            ],
            max_tokens=500,
            temperature=0.7
        )

        # Extract GPT's response
        bot_reply = response['choices'][0]['message']['content']

        # Append the bot's reply to the session state
        st.session_state['messages'].append({'role': 'assistant', 'content': bot_reply})

        # Display GPT's response
        with st.chat_message("assistant"):
            st.write(bot_reply)

    except openai.error.InvalidRequestError as e:
        st.error(f"An error occurred: {e}")
