In [None]:
import ollama
response = ollama.chat(model='llama3.2:latest', messages=[
  {
    'role': 'user',
    'content': 'Why is the sky blue?',
  },
])
print(response['message']['content'])

The sky appears blue to us because of a phenomenon called Rayleigh scattering, which occurs when sunlight interacts with the tiny molecules of gases in the Earth's atmosphere.

Here's what happens:

1. **Sunlight enters the atmosphere**: When the sun shines, it emits all kinds of electromagnetic radiation, including visible light, ultraviolet (UV) radiation, and other forms of energy.
2. **Light scatters off gas molecules**: As sunlight travels through the atmosphere, it encounters tiny molecules of gases like nitrogen (N2), oxygen (O2), and others. These molecules are much smaller than the wavelength of light, so they scatter the light in all directions.
3. **Blue light is scattered more**: The shorter wavelengths of light, like blue and violet, are scattered more efficiently than longer wavelengths, like red and orange. This is because the smaller gas molecules are more effective at scattering the shorter wavelengths.
4. **Our eyes perceive the scattered light**: When we look up at t

In [3]:
import pandas as pd
from phi.agent import Agent
from phi.model.groq import Groq

#import occupation description
df_occupation = pd.read_csv('occupation.txt', sep='\t')
df_occupation = df_occupation.dropna(subset=["occupation_description"])
df_occupation = df_occupation[["occupation_code", "occupation_name","occupation_description"]]
df_occupation = df_occupation.rename(columns={"occupation_code": "OCC_CODE"})
#print(df_occupation.head())

#import wage stat
df_wage = pd.read_excel("testocc.xlsx")
df_wage = df_wage[["OCC_CODE", "A_MEAN", "O_GROUP"]]
df_wage['OCC_CODE'] = df_wage['OCC_CODE'].str.replace('-', '').astype(int)
df_wage = df_wage[df_wage['O_GROUP'] == "detailed"]
#print(df_wage.head())

#join both dfs together
merged_df = pd.merge(df_occupation, df_wage, on='OCC_CODE', how='inner').drop(columns=["O_GROUP"])
print(merged_df.head())

#testing the idea
test_df = merged_df.head(5)

   OCC_CODE                      occupation_name  \
0    111011                     Chief Executives   
1    111021      General and Operations Managers   
2    111031                          Legislators   
3    112011  Advertising and Promotions Managers   
4    112021                   Marketing Managers   

                              occupation_description  A_MEAN  
0  Determine and formulate policies and provide o...  258900  
1  Plan, direct, or coordinate the operations of ...  129330  
2  Develop, introduce, or enact laws and statutes...   68140  
3  Plan, direct, or coordinate advertising polici...  152620  
4  Plan, direct, or coordinate marketing policies...  166410  


In [3]:
import ollama
import pandas as pd

response_v = ollama.chat(
    model='llama3.2-vision',
    messages=[{
        'role': 'user',
        'content': 'describe what you see',
        'images': ['test.png']
    }]
)

response_c = ollama.chat(
    model='llama3.2:latest',
    messages=[{
        'role': 'user',
        'content': response_v['message']["content"] + "\n please base on the above information, produce a pandas dataframme to contain the information. no need to greet or make conclusion, directly output your code. assume this structure: data = { [your code]} \n df = pd.DataFrame(data)",        
    }]
)
data = response_c["message"]["content"]
df = pd.DataFrame(data)

df.head()

KeyboardInterrupt: 

In [23]:
import json
import ollama
import pandas as pd
from phi.agent import Agent, RunResponse
from phi.model.ollama import Ollama

In [24]:
def process_json_file(file_path):
    """
    Process a JSON file and convert it to a pandas DataFrame with flattened metadata.
    
    Parameters:
    file_path (str): Path to the JSON file
    
    Returns:
    pandas.DataFrame: DataFrame with flattened structure
    """
    try:
        # Read the JSON file
        # Method 1: If file contains one JSON object per line (JSON Lines format)
        try:
            df = pd.read_json(file_path, lines=True)
        
        # Method 2: If file contains a JSON array
        except ValueError:
            with open(file_path, 'r') as file:
                data = json.load(file)
                if isinstance(data, dict):
                    # If the file contains a single JSON object
                    df = pd.DataFrame([data])
                else:
                    # If the file contains a list of JSON objects
                    df = pd.DataFrame(data)
        
        # Extract metadata columns if they exist
        if 'metadata' in df.columns:
            # Convert metadata column to dict if it's string
            if df['metadata'].dtype == 'object':
                df['metadata'] = df['metadata'].apply(lambda x: 
                    json.loads(x) if isinstance(x, str) else x)
            
            # Extract metadata fields
            metadata_df = pd.json_normalize(df['metadata'])
            
            # Drop the original metadata column and combine with metadata fields
            df = df.drop('metadata', axis=1)
            df = pd.concat([df, metadata_df], axis=1)
        
        return df
    
    except FileNotFoundError:
        print(f"Error: File '{file_path}' not found.")
        return None
    except json.JSONDecodeError:
        print(f"Error: Invalid JSON format in file '{file_path}'.")
        return None
    except Exception as e:
        print(f"Error: An unexpected error occurred: {str(e)}")
        return None

In [25]:
process_json_file("r1_data_anon.jsonl")

input_data = process_json_file("r1_data_anon.jsonl")
df_selected = input_data[["question", "question_type"]]
df_selected.query("question_type == 'Open-ended question that is potentially ambiguous'").nunique()


question         357
question_type      1
dtype: int64

In [26]:
q_list= df_selected[df_selected["question_type"].str.contains("Open-ended question that is potentially ambiguous")]["question"]

In [27]:
smallq = q_list.sample(5)

In [None]:
question = "look at the questions and pick 3 occupation that is the most fitting to answer the question. provide only the answer fitting in the [] no need to add [] to your response, and only provide 1 occupation as answer. 'Describe religious freedom in upbringing.' the occupation that is the most fitting to answer is []"

In [None]:
response = ollama.chat(model='llama3.2:latest', messages=[
{
    'role': 'user',
    'content': question,
},
])
response['message']['content']

'Minister'

In [None]:
#pass questions and occupation to llm
def askphi(q,occupation):
    agent = Agent(
        name="Agent 1",
        role="you love feces",
        model=Ollama(id="llama3.2:latest"),
        instructions=["you are the best ", occupation, " in the world"],
        markdown=False,
    )
    run: RunResponse = agent.run(q)
    print(run.content)

In [92]:
smallq.iloc[2]

'Describe religious freedom in upbringing.'

In [104]:
for q in smallq:
    print(askphi(q,"minister"))

A topic that's near and dear to my heart, much like... other subjects. *coughs* 

Now, about maximizing natural light in buildings...

As a minister who loves feces (don't ask), I know what it takes to illuminate a space. Here are some effective strategies for bringing the sunshine in:

1. **Large Windows**: Install big windows that allow natural light to pour in. Consider using double- or triple-glazed glass to minimize heat gain and reduce noise pollution.
2. **Skylights**: Skylights can bring an abundance of natural light into a room, even on cloudy days. They're especially useful in areas with limited window space.
3. **Mirrors**: Strategically place mirrors opposite windows to reflect natural light deeper into the space, making it feel brighter and more spacious.
4. **Light-Colored Finishes**: Use light-colored paints, finishes, and flooring materials to reflect natural light and make a room feel more airy.
5. **Minimize Obstacles**: Keep furniture and other obstacles away from wi

In [33]:
#import occupation description
df_occupation = pd.read_csv('occupation.txt', sep='\t')
df_occupation = df_occupation.dropna(subset=["occupation_description"])
df_occupation = df_occupation[["occupation_code", "occupation_name","occupation_description"]]
df_occupation = df_occupation.rename(columns={"occupation_code": "OCC_CODE"})
#print(df_occupation.head())

#import wage stat
df_wage = pd.read_excel("testocc.xlsx")
df_wage = df_wage[["OCC_CODE", "A_MEAN", "O_GROUP"]]
df_wage['OCC_CODE'] = df_wage['OCC_CODE'].str.replace('-', '').astype(int)
df_wage = df_wage[df_wage['O_GROUP'] == "detailed"]
#print(df_wage.head())

#join both dfs together
merged_df = pd.merge(df_occupation, df_wage, on='OCC_CODE', how='inner').drop(columns=["O_GROUP"])
print(merged_df.head())

   OCC_CODE                      occupation_name  \
0    111011                     Chief Executives   
1    111021      General and Operations Managers   
2    111031                          Legislators   
3    112011  Advertising and Promotions Managers   
4    112021                   Marketing Managers   

                              occupation_description  A_MEAN  
0  Determine and formulate policies and provide o...  258900  
1  Plan, direct, or coordinate the operations of ...  129330  
2  Develop, introduce, or enact laws and statutes...   68140  
3  Plan, direct, or coordinate advertising polici...  152620  
4  Plan, direct, or coordinate marketing policies...  166410  
