In [None]:
# install replicate client
!pip install replicate

In [None]:
# get a token: https://replicate.com/account
#THIS IS FOR VERIFICATION OF YOUR API KEY
#AFTER EXECUTING ,ENTER YOUR API KEY.
from getpass import getpass
import os

REPLICATE_API_TOKEN = getpass()
os.environ["REPLICATE_API_TOKEN"] = REPLICATE_API_TOKEN

In [None]:
import pandas as pd
import numpy as np                         #IMPORT THE LIBERIRIES
import matplotlib.pyplot as plt
import replicate  

#STEP 1: READ THE CSV FILE
def read_csv(file_path):
    try:
        data = pd.read_csv(file_path)
        return data
    except Exception as e:
        print(f"Error reading the CSV file: {e}")
        return None

#STEP 2: CALCULATE THE STATISTICS OF DATA.
def calculate_statistics(data):
    stats = {
        'Mean': data.mean(),
        'Median': data.median(),
        'Mode': data.mode().iloc[0],
        'Standard Deviation': data.std()
    }
    return stats

#STEP 3: PLOT THE DATA
def plot_histogram(data, column_name):
    plt.hist(data[column_name].dropna(), bins=20, edgecolor='black')
    plt.title(f'Histogram of {column_name}')
    plt.xlabel(column_name)
    plt.ylabel('Frequency')
    plt.show()

def plot_scatter(data, x_column, y_column):
    plt.scatter(data[x_column], data[y_column])
    plt.title(f'Scatter Plot: {x_column} vs {y_column}')
    plt.xlabel(x_column)
    plt.ylabel(y_column)
    plt.show()

#STEP 4:SUMMARIZE THE DATA 
def summarize_data(data):
    
    summary = "Data Summary:\n"
    summary += "\n".join([f"{column}: Mean = {data[column].mean():}, Median = {data[column].median():},Std = {data[column].std():}" for column in data.select_dtypes(include=[np.number]).columns])
    return summary

#THIS IS THE LLAMA CODE FOR TEXT GENERATION YOU GET IT FROM REPLICATE.COM
def ask_model(data_summary, question):
    """Stream the model's response to a specific question, given the data summary."""
    full_prompt = data_summary + "\n\n" + question
    for event in replicate.stream(
        "meta/meta-llama-3-70b-instruct",
        input={
            "top_p": 0.9,
            "prompt": full_prompt,
            "max_tokens": 512,
            "min_tokens": 0,
            "temperature": 0.6,
            "presence_penalty": 1.15,
            "frequency_penalty": 0.2
        },
    ):
        print(str(event), end="")

#MAIN METHOD:
def main():
    csv_file_path = input("Enter the path to the CSV file: ")
    data = read_csv(csv_file_path)
    if data is None:
        return

    data_summary = summarize_data(data)

    statistics = calculate_statistics(data)
    print("\nStatistical Analysis:")
    for stat, value in statistics.items():
        print(f"{stat}: {value}")

    column_name = input("Enter the column name for the histogram: ")
    plot_histogram(data, column_name)
    
    #NOTE: THE NAMES OF THE COLUMN SHOULD MATCH WITH THE GIVE CSV FILE DATA
    
    if len(data.columns) >= 2:
        x_column = input("Enter the X-axis column for scatter plot: ")
        y_column = input("Enter the Y-axis column for scatter plot: ")
        plot_scatter(data, x_column, y_column)

    while True:
        question = input("Ask any question about the data or type 'exit' to quit: ")
        if question.lower() == 'exit':
            break
        ask_model(data_summary, question)

if __name__ == "__main__":
    main()
