**INSTALL DEPENDENCIES**

In [None]:
!pip install python-docx PyPDF2 pytesseract Pillow openpyxl streamlit together

**1. Analysing model in .csv (Titanic Dataset)**

In [11]:
import os
import pandas as pd
import docx
import PyPDF2
import pytesseract
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from together import Together
import io
import json
from datetime import datetime
import subprocess
import platform

os.environ['TOGETHER_API_KEY'] = 'tgp_v1_ZWeUvKN-bQqNQDHRmw_0nfLGarqsPRC0eDXlOY75pnA'
client = Together(api_key=os.environ['TOGETHER_API_KEY'])
MODEL_NAME = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'

MEMORY_FILE = 'memory.json'

def load_memory():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
            return json.load(f)
    return {'files': [], 'interactions': []}

def save_memory(memory):
    with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
        json.dump(memory, f, indent=2)

def clear_memory():
    memory = {'files': [], 'interactions': []}
    save_memory(memory)
    return "Memory cleared."

def read_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def read_doc_file(file_path):
    doc = docx.Document(file_path)
    return ' '.join([para.text for para in doc.paragraphs])

def read_pdf_file(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text() + ' '
        return text

def read_image_file(file_path):
    image = Image.open(file_path)
    text = pytesseract.image_to_string(image)
    return text

def read_excel_csv_file(file_path):
    if file_path.endswith('.csv'):
        encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
        for encoding in encodings:
            try:
                return pd.read_csv(file_path, encoding=encoding)
            except UnicodeDecodeError:
                continue
        raise ValueError(f"Unable to decode CSV file {file_path} with tried encodings: {encodings}")
    elif file_path.endswith('.xlsx'):
        return pd.read_excel(file_path)
    return None

def process_file(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File {file_path} does not exist")
    if file_path.endswith(('.txt', '.doc', '.docx')):
        return read_doc_file(file_path) if file_path.endswith(('.doc', '.docx')) else read_text_file(file_path)
    elif file_path.endswith('.pdf'):
        return read_pdf_file(file_path)
    elif file_path.endswith(('.png', '.jpg', '.jpeg')):
        return read_image_file(file_path)
    elif file_path.endswith(('.csv', '.xlsx')):
        return read_excel_csv_file(file_path)
    else:
        raise ValueError('Unsupported file type')

def open_visualization(file_path):
    try:
        if platform.system() == 'Windows':
            os.startfile(file_path)
        elif platform.system() == 'Darwin':
            subprocess.run(['open', file_path])
        else:
            subprocess.run(['xdg-open', file_path])
        return True
    except Exception as e:
        print(f"Failed to open visualization: {e}")
        return False

class DataAnalystAgent:
    def __init__(self):
        self.data = None
        self.text_data = None
        self.client = client
        self.memory = load_memory()

    def load_data(self, file_path):
        data = process_file(file_path)
        if isinstance(data, pd.DataFrame):
            self.data = data
        else:
            self.text_data = data

        self.memory['files'].append({
            'file_path': file_path,
            'type': 'structured' if isinstance(data, pd.DataFrame) else 'text',
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)

    def analyze_data(self):
        if self.data is not None:
            dtypes_dict = {col: str(dtype) for col, dtype in self.data.dtypes.items()}
            analysis = {
                'summary': self.data.describe().to_dict(),
                'missing_values': self.data.isnull().sum().to_dict(),
                'dtypes': dtypes_dict
            }

            self.memory['files'][-1]['analysis'] = analysis
            save_memory(self.memory)
            return analysis
        return {'message': 'No structured data available for analysis'}

    def generate_visualization(self, plot_type='histogram', column=None):
        if self.data is None:
            return 'No data available for visualization'

        plt.figure(figsize=(10, 6))
        if plot_type == 'histogram' and column:
            sns.histplot(self.data[column], kde=True)
            plt.title(f'Histogram of {column}')
        elif plot_type == 'bar' and column:
            self.data[column].value_counts().plot(kind='bar')
            plt.title(f'Bar Plot of {column}')
        elif plot_type == 'scatter' and column:
            if len(self.data.columns) >= 2:
                sns.scatterplot(x=self.data.columns[0], y=self.data.columns[1], data=self.data)
                plt.title(f'Scatter Plot of {self.data.columns[0]} vs {self.data.columns[1]}')
        else:
            return 'Invalid plot type or column'

        output_file = f'visualization_{len(self.memory["interactions"])}.png'
        plt.savefig(output_file)
        plt.close()

        self.memory['interactions'].append({
            'type': 'visualization',
            'plot_type': plot_type,
            'column': column,
            'file': output_file,
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)

        description = self.describe_visualization(plot_type, column, output_file)

        opened = open_visualization(output_file)
        return f'Visualization saved as {output_file}\nDescription: {description}\n' + \
               (f'Visualization opened in default viewer.' if opened else 'Please open the visualization manually.')

    def describe_visualization(self, plot_type, column, output_file):
        context = ''
        if self.data is not None:
            context += f'Data summary: {json.dumps(self.analyze_data(), indent=2)}\n'
        context += f'Generated a {plot_type} plot for column {column} saved as {output_file}.'

        prompt = f'Context: {context}\nDescribe the visualization as a data analyst, focusing on key features and insights.'

        response = self.client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {'role': 'system', 'content': 'You are a data analyst. Describe the visualization based on the given context.'},
                {'role': 'user', 'content': prompt}
            ],
            max_tokens=200
        )
        return response.choices[0].message.content

    def answer_question(self, question):
        context = ''

        if self.data is not None:
            context += f'Current Data summary: {json.dumps(self.analyze_data(), indent=2)}\n'
        if self.text_data:
            context += f'Current Text data: {self.text_data[:1000]}...\n'

        context += 'Memory of past interactions:\n'
        for file_info in self.memory['files']:
            context += f"File: {file_info['file_path']} (Type: {file_info['type']}, Loaded: {file_info['timestamp']})\n"
            if 'analysis' in file_info:
                context += f"Analysis: {json.dumps(file_info['analysis'], indent=2)}\n"
        for interaction in self.memory['interactions']:
            if interaction['type'] == 'visualization':
                context += f"Visualization: {interaction['plot_type']} on {interaction['column']} saved as {interaction['file']} at {interaction['timestamp']}\n"
            elif interaction['type'] == 'question':
                context += f"Question: {interaction['question']} | Answer: {interaction['answer']} at {interaction['timestamp']}\n"

        prompt = f'Context: {context}\nQuestion: {question}\nAnswer as a data analyst, providing insights based on the available data, text, and memory of past interactions.'

        response = self.client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {'role': 'system', 'content': 'You are a data analyst. Provide accurate and insightful answers based on the given data, text, and memory.'},
                {'role': 'user', 'content': prompt}
            ],
            max_tokens=500
        )
        answer = response.choices[0].message.content

        self.memory['interactions'].append({
            'type': 'question',
            'question': question,
            'answer': answer,
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)
        return answer

    def view_memory(self):
        return json.dumps(self.memory, indent=2)

def main():
    agent = DataAnalystAgent()
    print("Data Analyst Agent: Enter 'exit' to quit, 'view_memory' to see history, or 'clear_memory' to reset.")

    while True:
        file_path = input("Enter file path (.csv, .xlsx, .txt, .docx, .pdf, .png, .jpg, .jpeg): ").strip().strip('"')
        if file_path.lower() == 'exit':
            break
        if file_path.lower() == 'view_memory':
            print("Memory Contents:", agent.view_memory())
            continue
        if file_path.lower() == 'clear_memory':
            print(clear_memory())
            agent.memory = load_memory()
            continue
        try:
            agent.load_data(file_path)
            print("File loaded successfully!")
        except Exception as e:
            print(f"Error loading file: {e}")
            continue

        if agent.data is not None:
            print("Data Analysis:", json.dumps(agent.analyze_data(), indent=2))
            plot_type = input("Enter plot type (histogram, bar, scatter): ").strip().lower()
            if plot_type in ['histogram', 'bar']:
                column = input(f"Enter column name (available: {list(agent.data.columns)}): ").strip()
                if column in agent.data.columns:
                    result = agent.generate_visualization(plot_type, column)
                    print(result)
                else:
                    print("Invalid column name.")
            elif plot_type == 'scatter':
                result = agent.generate_visualization(plot_type)
                print(result)
            else:
                print("Invalid plot type.")

        while True:
            question = input("Ask a question about the data (or 'next' for new file, 'exit' to quit, 'view_memory' to see history, 'clear_memory' to reset): ").strip()
            if question.lower() == 'exit':
                return
            if question.lower() == 'next':
                break
            if question.lower() == 'view_memory':
                print("Memory Contents:", agent.view_memory())
                continue
            if question.lower() == 'clear_memory':
                print(clear_memory())
                agent.memory = load_memory()
                continue
            try:
                answer = agent.answer_question(question)
                print("Answer:", answer)
            except Exception as e:
                print(f"Error answering question: {e}")

if __name__ == '__main__':
    main()

Data Analyst Agent: Enter 'exit' to quit, 'view_memory' to see history, or 'clear_memory' to reset.
Enter file path (.csv, .xlsx, .txt, .docx, .pdf, .png, .jpg, .jpeg): 1.csv
File loaded successfully!
Data Analysis: {
  "summary": {
    "Passengerid": {
      "count": 1309.0,
      "mean": 655.0,
      "std": 378.0200611960517,
      "min": 1.0,
      "25%": 328.0,
      "50%": 655.0,
      "75%": 982.0,
      "max": 1309.0
    },
    "Age": {
      "count": 1309.0,
      "mean": 29.50318563789152,
      "std": 12.905240585464622,
      "min": 0.17,
      "25%": 22.0,
      "50%": 28.0,
      "75%": 35.0,
      "max": 80.0
    },
    "Fare": {
      "count": 1309.0,
      "mean": 33.28108563789152,
      "std": 51.74149976752607,
      "min": 0.0,
      "25%": 7.8958,
      "50%": 14.4542,
      "75%": 31.275,
      "max": 512.3292
    },
    "Sex": {
      "count": 1309.0,
      "mean": 0.3559969442322384,
      "std": 0.47899728344132936,
      "min": 0.0,
      "25%": 0.0,
      "50

**2. Analysing PDF (YT chatbot)**

In [13]:
import os
import pandas as pd
import docx
import PyPDF2
import pytesseract
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from together import Together
import io
import json
from datetime import datetime
import subprocess
import platform

os.environ['TOGETHER_API_KEY'] = 'tgp_v1_5Yf5j-Vu39EjPkLKg3InorzjMbHzeOtgT_KmqAXajzk'
client = Together(api_key=os.environ['TOGETHER_API_KEY'])
MODEL_NAME = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'

MEMORY_FILE = 'memory.json'

def load_memory():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
            return json.load(f)
    return {'files': [], 'interactions': []}

def save_memory(memory):
    with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
        json.dump(memory, f, indent=2)

def clear_memory():
    memory = {'files': [], 'interactions': []}
    save_memory(memory)
    return "Memory cleared."

def read_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def read_doc_file(file_path):
    doc = docx.Document(file_path)
    return ' '.join([para.text for para in doc.paragraphs])

def read_pdf_file(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text() + ' '
        return text

def read_image_file(file_path):
    image = Image.open(file_path)
    text = pytesseract.image_to_string(image)
    return text

def read_excel_csv_file(file_path):
    if file_path.endswith('.csv'):
        encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
        for encoding in encodings:
            try:
                return pd.read_csv(file_path, encoding=encoding)
            except UnicodeDecodeError:
                continue
        raise ValueError(f"Unable to decode CSV file {file_path} with tried encodings: {encodings}")
    elif file_path.endswith('.xlsx'):
        return pd.read_excel(file_path)
    return None

def process_file(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File {file_path} does not exist")
    if file_path.endswith(('.txt', '.doc', '.docx')):
        return read_doc_file(file_path) if file_path.endswith(('.doc', '.docx')) else read_text_file(file_path)
    elif file_path.endswith('.pdf'):
        return read_pdf_file(file_path)
    elif file_path.endswith(('.png', '.jpg', '.jpeg')):
        return read_image_file(file_path)
    elif file_path.endswith(('.csv', '.xlsx')):
        return read_excel_csv_file(file_path)
    else:
        raise ValueError('Unsupported file type')

def open_visualization(file_path):
    try:
        if platform.system() == 'Windows':
            os.startfile(file_path)
        elif platform.system() == 'Darwin':
            subprocess.run(['open', file_path])
        else:
            subprocess.run(['xdg-open', file_path])
        return True
    except Exception as e:
        print(f"Failed to open visualization: {e}")
        return False

class DataAnalystAgent:
    def __init__(self):
        self.data = None
        self.text_data = None
        self.client = client
        self.memory = load_memory()

    def load_data(self, file_path):
        data = process_file(file_path)
        if isinstance(data, pd.DataFrame):
            self.data = data
        else:
            self.text_data = data
        self.memory['files'].append({
            'file_path': file_path,
            'type': 'structured' if isinstance(data, pd.DataFrame) else 'text',
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)

    def analyze_data(self):
        if self.data is not None:
            dtypes_dict = {col: str(dtype) for col, dtype in self.data.dtypes.items()}
            analysis = {
                'summary': self.data.describe().to_dict(),
                'missing_values': self.data.isnull().sum().to_dict(),
                'dtypes': dtypes_dict
            }
            self.memory['files'][-1]['analysis'] = analysis
            save_memory(self.memory)
            return analysis
        return {'message': 'No structured data available for analysis'}

    def generate_visualization(self, plot_type='histogram', column=None):
        if self.data is None:
            return 'No data available for visualization'

        plt.figure(figsize=(10, 6))
        if plot_type == 'histogram' and column:
            sns.histplot(self.data[column], kde=True)
            plt.title(f'Histogram of {column}')
        elif plot_type == 'bar' and column:
            self.data[column].value_counts().plot(kind='bar')
            plt.title(f'Bar Plot of {column}')
        elif plot_type == 'scatter' and column:
            if len(self.data.columns) >= 2:
                sns.scatterplot(x=self.data.columns[0], y=self.data.columns[1], data=self.data)
                plt.title(f'Scatter Plot of {self.data.columns[0]} vs {self.data.columns[1]}')
        else:
            return 'Invalid plot type or column'

        output_file = f'visualization_{len(self.memory["interactions"])}.png'
        plt.savefig(output_file)
        plt.close()

        self.memory['interactions'].append({
            'type': 'visualization',
            'plot_type': plot_type,
            'column': column,
            'file': output_file,
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)

        description = self.describe_visualization(plot_type, column, output_file)
        opened = open_visualization(output_file)
        return f'Visualization saved as {output_file}\nDescription: {description}\n' + \
               (f'Visualization opened in default viewer.' if opened else 'Please open the visualization manually.')

    def describe_visualization(self, plot_type, column, output_file):
        context = ''
        if self.data is not None:
            context += f'Data summary: {json.dumps(self.analyze_data(), indent=2)}\n'
        context += f'Generated a {plot_type} plot for column {column} saved as {output_file}.'

        prompt = f'Context: {context}\nDescribe the visualization as a data analyst, focusing on key features and insights.'

        response = self.client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {'role': 'system', 'content': 'You are a data analyst. Describe the visualization based on the given context.'},
                {'role': 'user', 'content': prompt}
            ],
            max_tokens=200
        )
        return response.choices[0].message.content

    def answer_question(self, question):
        context = ''

        if self.data is not None:
            context += f'Current Data summary: {json.dumps(self.analyze_data(), indent=2)}\n'
        if self.text_data:
            context += f'Current Text data: {self.text_data[:1000]}...\n'
        context += 'Memory of past interactions:\n'
        for file_info in self.memory['files']:
            context += f"File: {file_info['file_path']} (Type: {file_info['type']}, Loaded: {file_info['timestamp']})\n"
            if 'analysis' in file_info:
                context += f"Analysis: {json.dumps(file_info['analysis'], indent=2)}\n"
        for interaction in self.memory['interactions']:
            if interaction['type'] == 'visualization':
                context += f"Visualization: {interaction['plot_type']} on {interaction['column']} saved as {interaction['file']} at {interaction['timestamp']}\n"
            elif interaction['type'] == 'question':
                context += f"Question: {interaction['question']} | Answer: {interaction['answer']} at {interaction['timestamp']}\n"

        prompt = f'Context: {context}\nQuestion: {question}\nAnswer as a data analyst, providing insights based on the available data, text, and memory of past interactions.'

        response = self.client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {'role': 'system', 'content': 'You are a data analyst. Provide accurate and insightful answers based on the given data, text, and memory.'},
                {'role': 'user', 'content': prompt}
            ],
            max_tokens=500
        )
        answer = response.choices[0].message.content
        self.memory['interactions'].append({
            'type': 'question',
            'question': question,
            'answer': answer,
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)
        return answer

    def view_memory(self):
        return json.dumps(self.memory, indent=2)

def main():
    agent = DataAnalystAgent()
    print("Data Analyst Agent: Enter 'exit' to quit, 'view_memory' to see history, or 'clear_memory' to reset.")

    while True:
        file_path = input("Enter file path (.csv, .xlsx, .txt, .docx, .pdf, .png, .jpg, .jpeg): ").strip().strip('"')
        if file_path.lower() == 'exit':
            break
        if file_path.lower() == 'view_memory':
            print("Memory Contents:", agent.view_memory())
            continue
        if file_path.lower() == 'clear_memory':
            print(clear_memory())
            agent.memory = load_memory()
            continue
        try:
            agent.load_data(file_path)
            print("File loaded successfully!")
        except Exception as e:
            print(f"Error loading file: {e}")
            continue

        if agent.data is not None:
            print("Data Analysis:", json.dumps(agent.analyze_data(), indent=2))
            plot_type = input("Enter plot type (histogram, bar, scatter): ").strip().lower()
            if plot_type in ['histogram', 'bar']:
                column = input(f"Enter column name (available: {list(agent.data.columns)}): ").strip()
                if column in agent.data.columns:
                    result = agent.generate_visualization(plot_type, column)
                    print(result)
                else:
                    print("Invalid column name.")
            elif plot_type == 'scatter':
                result = agent.generate_visualization(plot_type)
                print(result)
            else:
                print("Invalid plot type.")

        while True:
            question = input("Ask a question about the data (or 'next' for new file, 'exit' to quit, 'view_memory' to see history, 'clear_memory' to reset): ").strip()
            if question.lower() == 'exit':
                return
            if question.lower() == 'next':
                break
            if question.lower() == 'view_memory':
                print("Memory Contents:", agent.view_memory())
                continue
            if question.lower() == 'clear_memory':
                print(clear_memory())
                agent.memory = load_memory()
                continue
            try:
                answer = agent.answer_question(question)
                print("Answer:", answer)
            except Exception as e:
                print(f"Error answering question: {e}")

if __name__ == '__main__':
    main()

Data Analyst Agent: Enter 'exit' to quit, 'view_memory' to see history, or 'clear_memory' to reset.
Enter file path (.csv, .xlsx, .txt, .docx, .pdf, .png, .jpg, .jpeg): YTchatbot_report.pdf
File loaded successfully!
Ask a question about the data (or 'next' for new file, 'exit' to quit, 'view_memory' to see history, 'clear_memory' to reset): clear_memory
Memory cleared.
Ask a question about the data (or 'next' for new file, 'exit' to quit, 'view_memory' to see history, 'clear_memory' to reset): wha ts this pdf about
Answer: Based on the provided text data, I will analyze it and provide an insightful answer.

The text appears to be a personal profile or a resume of an individual, Manshu555, highlighting their programming experience and a significant project they worked on. 

As a data analyst, my analysis of the text reveals the following key points:

1. **Primary Programming Experience**: The author has extensive experience in Python-based backend development, with a focus on integratin

**3. Analysing Image (Black Hole Article in Google Front Page from Wikipedia)**

In [15]:
import os
import pandas as pd
import docx
import PyPDF2
import pytesseract
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from together import Together
import io
import json
from datetime import datetime
import subprocess
import platform

os.environ['TOGETHER_API_KEY'] = 'tgp_v1_5Yf5j-Vu39EjPkLKg3InorzjMbHzeOtgT_KmqAXajzk'
client = Together(api_key=os.environ['TOGETHER_API_KEY'])
MODEL_NAME = 'meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8'

MEMORY_FILE = 'memory.json'

def load_memory():
    if os.path.exists(MEMORY_FILE):
        with open(MEMORY_FILE, 'r', encoding='utf-8') as f:
            return json.load(f)
    return {'files': [], 'interactions': []}

def save_memory(memory):
    with open(MEMORY_FILE, 'w', encoding='utf-8') as f:
        json.dump(memory, f, indent=2)

def clear_memory():
    memory = {'files': [], 'interactions': []}
    save_memory(memory)
    return "Memory cleared."

def read_text_file(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        return file.read()

def read_doc_file(file_path):
    doc = docx.Document(file_path)
    return ' '.join([para.text for para in doc.paragraphs])

def read_pdf_file(file_path):
    with open(file_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        text = ''
        for page in reader.pages:
            text += page.extract_text() + ' '
        return text

def read_image_file(file_path):
    image = Image.open(file_path)
    text = pytesseract.image_to_string(image)
    return text

def read_excel_csv_file(file_path):
    if file_path.endswith('.csv'):
        encodings = ['utf-8', 'latin1', 'iso-8859-1', 'cp1252']
        for encoding in encodings:
            try:
                return pd.read_csv(file_path, encoding=encoding)
            except UnicodeDecodeError:
                continue
        raise ValueError(f"Unable to decode CSV file {file_path} with tried encodings: {encodings}")
    elif file_path.endswith('.xlsx'):
        return pd.read_excel(file_path)
    return None

def process_file(file_path):
    if not os.path.exists(file_path):
        raise FileNotFoundError(f"File {file_path} does not exist")
    if file_path.endswith(('.txt', '.doc', '.docx')):
        return read_doc_file(file_path) if file_path.endswith(('.doc', '.docx')) else read_text_file(file_path)
    elif file_path.endswith('.pdf'):
        return read_pdf_file(file_path)
    elif file_path.endswith(('.png', '.jpg', '.jpeg')):
        return read_image_file(file_path)
    elif file_path.endswith(('.csv', '.xlsx')):
        return read_excel_csv_file(file_path)
    else:
        raise ValueError('Unsupported file type')

def open_visualization(file_path):
    try:
        if platform.system() == 'Windows':
            os.startfile(file_path)
        elif platform.system() == 'Darwin':
            subprocess.run(['open', file_path])
        else:
            subprocess.run(['xdg-open', file_path])
        return True
    except Exception as e:
        print(f"Failed to open visualization: {e}")
        return False

class DataAnalystAgent:
    def __init__(self):
        self.data = None
        self.text_data = None
        self.client = client
        self.memory = load_memory()

    def load_data(self, file_path):
        data = process_file(file_path)
        if isinstance(data, pd.DataFrame):
            self.data = data
        else:
            self.text_data = data
        self.memory['files'].append({
            'file_path': file_path,
            'type': 'structured' if isinstance(data, pd.DataFrame) else 'text',
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)

    def analyze_data(self):
        if self.data is not None:
            dtypes_dict = {col: str(dtype) for col, dtype in self.data.dtypes.items()}
            analysis = {
                'summary': self.data.describe().to_dict(),
                'missing_values': self.data.isnull().sum().to_dict(),
                'dtypes': dtypes_dict
            }
            self.memory['files'][-1]['analysis'] = analysis
            save_memory(self.memory)
            return analysis
        return {'message': 'No structured data available for analysis'}

    def generate_visualization(self, plot_type='histogram', column=None):
        if self.data is None:
            return 'No data available for visualization'

        plt.figure(figsize=(10, 6))
        if plot_type == 'histogram' and column:
            sns.histplot(self.data[column], kde=True)
            plt.title(f'Histogram of {column}')
        elif plot_type == 'bar' and column:
            self.data[column].value_counts().plot(kind='bar')
            plt.title(f'Bar Plot of {column}')
        elif plot_type == 'scatter' and column:
            if len(self.data.columns) >= 2:
                sns.scatterplot(x=self.data.columns[0], y=self.data.columns[1], data=self.data)
                plt.title(f'Scatter Plot of {self.data.columns[0]} vs {self.data.columns[1]}')
        else:
            return 'Invalid plot type or column'

        output_file = f'visualization_{len(self.memory["interactions"])}.png'
        plt.savefig(output_file)
        plt.close()

        self.memory['interactions'].append({
            'type': 'visualization',
            'plot_type': plot_type,
            'column': column,
            'file': output_file,
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)

        description = self.describe_visualization(plot_type, column, output_file)
        opened = open_visualization(output_file)
        return f'Visualization saved as {output_file}\nDescription: {description}\n' + \
               (f'Visualization opened in default viewer.' if opened else 'Please open the visualization manually.')

    def describe_visualization(self, plot_type, column, output_file):
        context = ''
        if self.data is not None:
            context += f'Data summary: {json.dumps(self.analyze_data(), indent=2)}\n'
        context += f'Generated a {plot_type} plot for column {column} saved as {output_file}.'

        prompt = f'Context: {context}\nDescribe the visualization as a data analyst, focusing on key features and insights.'

        response = self.client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {'role': 'system', 'content': 'You are a data analyst. Describe the visualization based on the given context.'},
                {'role': 'user', 'content': prompt}
            ],
            max_tokens=200
        )
        return response.choices[0].message.content

    def answer_question(self, question):
        context = ''

        if self.data is not None:
            context += f'Current Data summary: {json.dumps(self.analyze_data(), indent=2)}\n'
        if self.text_data:
            context += f'Current Text data: {self.text_data[:1000]}...\n'
        context += 'Memory of past interactions:\n'
        for file_info in self.memory['files']:
            context += f"File: {file_info['file_path']} (Type: {file_info['type']}, Loaded: {file_info['timestamp']})\n"
            if 'analysis' in file_info:
                context += f"Analysis: {json.dumps(file_info['analysis'], indent=2)}\n"
        for interaction in self.memory['interactions']:
            if interaction['type'] == 'visualization':
                context += f"Visualization: {interaction['plot_type']} on {interaction['column']} saved as {interaction['file']} at {interaction['timestamp']}\n"
            elif interaction['type'] == 'question':
                context += f"Question: {interaction['question']} | Answer: {interaction['answer']} at {interaction['timestamp']}\n"

        prompt = f'Context: {context}\nQuestion: {question}\nAnswer as a data analyst, providing insights based on the available data, text, and memory of past interactions.'

        response = self.client.chat.completions.create(
            model=MODEL_NAME,
            messages=[
                {'role': 'system', 'content': 'You are a data analyst. Provide accurate and insightful answers based on the given data, text, and memory.'},
                {'role': 'user', 'content': prompt}
            ],
            max_tokens=500
        )
        answer = response.choices[0].message.content
        self.memory['interactions'].append({
            'type': 'question',
            'question': question,
            'answer': answer,
            'timestamp': datetime.now().isoformat()
        })
        save_memory(self.memory)
        return answer

    def view_memory(self):
        return json.dumps(self.memory, indent=2)

def main():
    agent = DataAnalystAgent()
    print("Data Analyst Agent: Enter 'exit' to quit, 'view_memory' to see history, or 'clear_memory' to reset.")

    while True:
        file_path = input("Enter file path (.csv, .xlsx, .txt, .docx, .pdf, .png, .jpg, .jpeg): ").strip().strip('"')
        if file_path.lower() == 'exit':
            break
        if file_path.lower() == 'view_memory':
            print("Memory Contents:", agent.view_memory())
            continue
        if file_path.lower() == 'clear_memory':
            print(clear_memory())
            agent.memory = load_memory()
            continue
        try:
            agent.load_data(file_path)
            print("File loaded successfully!")
        except Exception as e:
            print(f"Error loading file: {e}")
            continue

        if agent.data is not None:
            print("Data Analysis:", json.dumps(agent.analyze_data(), indent=2))
            plot_type = input("Enter plot type (histogram, bar, scatter): ").strip().lower()
            if plot_type in ['histogram', 'bar']:
                column = input(f"Enter column name (available: {list(agent.data.columns)}): ").strip()
                if column in agent.data.columns:
                    result = agent.generate_visualization(plot_type, column)
                    print(result)
                else:
                    print("Invalid column name.")
            elif plot_type == 'scatter':
                result = agent.generate_visualization(plot_type)
                print(result)
            else:
                print("Invalid plot type.")

        while True:
            question = input("Ask a question about the data (or 'next' for new file, 'exit' to quit, 'view_memory' to see history, 'clear_memory' to reset): ").strip()
            if question.lower() == 'exit':
                return
            if question.lower() == 'next':
                break
            if question.lower() == 'view_memory':
                print("Memory Contents:", agent.view_memory())
                continue
            if question.lower() == 'clear_memory':
                print(clear_memory())
                agent.memory = load_memory()
                continue
            try:
                answer = agent.answer_question(question)
                print("Answer:", answer)
            except Exception as e:
                print(f"Error answering question: {e}")

if __name__ == '__main__':
    main()

Data Analyst Agent: Enter 'exit' to quit, 'view_memory' to see history, or 'clear_memory' to reset.
Enter file path (.csv, .xlsx, .txt, .docx, .pdf, .png, .jpg, .jpeg): 3.png
File loaded successfully!
Ask a question about the data (or 'next' for new file, 'exit' to quit, 'view_memory' to see history, 'clear_memory' to reset): clear_memory
Memory cleared.
Ask a question about the data (or 'next' for new file, 'exit' to quit, 'view_memory' to see history, 'clear_memory' to reset): what is the context of this image
Answer: Based on the provided text data, it appears that the context is related to astrophysics or cosmology, specifically discussing black holes.

Here's a breakdown of the insights gathered from the data:

1. **Content Analysis**: The text mentions "gravity results from a massive amount of matter packed into a very small space," which is a characteristic of black holes. It further elaborates on the formation of black holes, mentioning the collapse of massive stars and superno

# **Visualizations such as histograms and bar graphs are saved in the local storage of Google Colab.**