In [94]:
import os
import cohere
import json
import pandas as pd

co = cohere.Client(os.environ["COHERE_API_KEY"])

In [95]:
df = pd.read_csv("CSCO_data.csv")

In [96]:
# Create a list to hold all JSONL entries
jsonl_entries = []

# Generate question-answer pairs for each row in the limited dataset for open, close, high, and low values
for _, row in df.iterrows():
    date = pd.to_datetime(row["date"]).strftime("%dth %b %Y")
    stock_name = row["Name"]
    open_price = row["open"]
    close_price = row["close"]
    high_price = row["high"]
    low_price = row["low"]

    questions_answers = [
        (
            f"Can you provide the opening price of {stock_name} on {date}?",
            f"The opening price of {stock_name} on {date} was {open_price}.",
        ),
        (
            f"What was the closing price of {stock_name} on {date}?",
            f"The closing price of {stock_name} on {date} was {close_price}.",
        ),
        (
            f"How high did {stock_name} stock go on {date}?",
            f"The highest price of {stock_name} on {date} was {high_price}.",
        ),
        (
            f"What was the lowest price of {stock_name} on {date}?",
            f"The lowest price of {stock_name} on {date} was {low_price}.",
        ),
    ]

    for question, answer in questions_answers:
        entry = {
            "messages": [
                {
                    "role": "System",
                    "content": """You are Danie, helpful assistant for dealing stock price conversations. You are polite, sweet and straightforward. You start asking any question by how can i help. Use QNA below to answer any question may have about stock prices. Be concise, detailed and informative.
                    ** Instructions **
                        1. Look for the company name "CSCO" (case-insensitive) in the question.
                        2. If "CSCO" is not found, respond with "I don't have enough information about that."
                        3. If "CSCO" is found:
                            - Look for keywords like "stock", "price", "open", "close", "high", "low", or "volume" (case-insensitive) in the question.
                            - If any of these keywords are found, assume the question relates to CSCO stock data.
                                - Process the question further to extract the specific details requested (e.g., date, high price).
                                - Based on your data access logic (replace "..."), formulate a response using the extracted information.
                            - If none of the keywords are found, respond with "I don't have enough information about that specific data point for CSCO."

                        ** Example output **
                            [
                                {{
                                    "question": "What is open price for CSCO on 13th Feb 2017",
                                    "answer": "Open Price for CSCO on 13th Feb 2020 is 31.67" 
                                }},
                                {{
                                    "question": "What was close price for CSCO on 13th Feb 2017",
                                    "answer": "Close Price for CSCO on 13th Feb 2020 is 31.97" 
                                }},
                                {{
                                    "question": "What was volume for CSCO on 13th Feb 2017",
                                    "answer": "Volume for CSCO on 13th Feb 2020 is 26518836" 
                                }},
                                {{
                                    "question": "on which date price was highest for CSCO?",
                                    "answer": "find highest value from csv file" 
                                }},
                                {{
                                    "question": "on which date high price was 31.38",
                                    "answer": "On 07th Feb 2017 high price was 31.38" 
                                }},
                                {{
                                    "question": "Who is PM of india?",
                                    "answer": "I don't have enough information about that." 
                                }},
                                {{
                                    "question": "What is 2?",
                                    "answer": "I don't have enough information about that." 
                                }},
                                {{
                                    "question": "Who won T20 world cup?",
                                    "answer": "I don't have enough information about that." 
                                }},
                                {{
                                    "question": "Who invented the computer?",
                                    "answer": "I don't have enough information about that." 
                                }},
                            ]
                    """,
                },
                {"role": "User", "content": question},
                {"role": "Chatbot", "content": answer},
            ]
        }

        # Append the entry to the list
        jsonl_entries.append(entry)

# Write the JSONL file
output_file_path = "stock_prices.jsonl"
with open(output_file_path, "w") as jsonl_file:
    for entry in jsonl_entries:
        jsonl_file.write(json.dumps(entry) + "\n")

In [97]:
stock_price_dataset = co.datasets.create(
    name="chat-dataset",
    data=open("stock_prices.jsonl", "rb"),
    type="chat-finetune-input",
)

In [98]:
ds = co.wait(stock_price_dataset)
print(ds)

...
...
...
...
...
...
...
...


In [99]:
from cohere.finetuning import FinetunedModel, Settings, BaseModel

In [100]:
finetune_model = co.finetuning.create_finetuned_model(
    request=FinetunedModel(
        name="stock-price",
        settings=Settings(
            base_model=BaseModel(
                base_type="BASE_TYPE_CHAT",
            ),
            dataset_id=stock_price_dataset.id,
        ),
    )
)

In [101]:
ft = co.finetuning.get_finetuned_model(finetune_model.finetuned_model.id)

In [105]:
user_input = input("Ask me anything:")

In [106]:
new_response = co.chat(
    message=user_input,
    model=ft.finetuned_model.id + "-ft",
)
print(new_response.text)

The current Prime Minister of India is Narendra Modi. He has held the office since 2014. He is the 14th Prime Minister of India.
