In [None]:
!pip install openai pandas fpdf




In [None]:
!pip install -Uqqq pip --progress-bar off
!pip install -qqq groq==0.13.0 --progress-bar off
!pip install -qqq python-dotenv==1.0.1 --progress-bar off

In [None]:
pip install requests



In [None]:
import openai
import pandas as pd
import json
from fpdf import FPDF

OPENAI_API_KEY = "APIKEY"

In [None]:

MISSING_VALUE_INDICATORS = ["?", "NA", "N/A", "NaN", "NULL", "", "Unknown", "unknown", "missing", "Missing"]

def load_dataset(file_path):
    try:
        df = pd.read_csv(file_path, na_values=MISSING_VALUE_INDICATORS)
        if df.empty:
            raise ValueError("Dataset is empty")
        return df
    except Exception as e:
        print(f"Error: {e}")
        return None

def query_openai(prompt):
    response = openai.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[{"role": "user", "content": prompt}]
    )
    return response.choices[0].message.content

class PDF(FPDF):
    def header(self):
        self.set_font("Arial", "B", 14)
        self.cell(0, 10, "Data Science Workflow Report", ln=True, align="C")
        self.ln(5)

    def footer(self):
        self.set_y(-15)
        self.set_font("Arial", "I", 10)
        self.cell(0, 10, f"Page {self.page_no()}", align="C")

    def add_section(self, title, content):
        self.set_font("Arial", "B", 12)
        self.cell(0, 10, title, ln=True)
        self.set_font("Arial", "", 12)
        self.multi_cell(0, 8, content)
        self.ln(2)

def main():
    file_path = "Automobile_data.csv"
    df = load_dataset(file_path)

    if df is None:
        return

    workflow_steps = ["Data Cleaning & Preparation", "Exploratory Data Analysis (EDA)",
                      "Machine Learning Algorithm Selection", "Model Optimization & Feature Engineering",
                      "Deployment & Real-World Considerations"]

    detailed_outputs = {}
    completed_tasks = ""

    overview_prompt = """
    Given a dataset with columns: {columns}, simply list the key steps involved in the data science workflow without explanations.
    """.format(columns=df.columns.tolist())

    overview = query_openai(overview_prompt)
    print("Workflow Overview:")
    print(overview)

    for step in workflow_steps:
        rating = int(input(f"\nRate your understanding of '{step}' from 1 (low) to 5 (high): "))
        detail_level = "extremely detailed, comprehensive, beginner-friendly, including reasoning and methods" if rating <= 2 else "moderately detailed" if rating <=4 else "concise"

        step_prompt = f"""
        Dataset columns: {df.columns.tolist()}.
        Tasks previously covered: {completed_tasks if completed_tasks else 'None'}.

        Provide an {detail_level} explanation specifically for the '{step}' step, clearly detailing:
        - Exactly what needs to be done at this step (tasks specific to this dataset).
        - Provide reasoning why each task is important.
        - Explicit methods and instructions on how to perform each task.
        Do not repeat or cover tasks that have already been explained in previous steps.
        Avoid redundancy or unrelated discussions.
        """

        response = query_openai(step_prompt)
        detailed_outputs[step] = response
        completed_tasks += f"{step}: {response}\n\n"
        print(f"\n{step} details generated.")

    pdf = PDF()
    pdf.add_page()

    pdf.add_section("Workflow Overview", overview)

    for step, content in detailed_outputs.items():
        pdf.add_section(step, content)

    pdf.output("Report.pdf")
    print("Report saved as 'Report.pdf'")

if __name__ == "__main__":
    main()


Workflow Overview:
1. Data collection
2. Data cleaning
3. Data exploration
4. Feature engineering
5. Model selection
6. Model training
7. Model evaluation
8. Model optimization
9. Predictions
10. Deployment

Rate your understanding of 'Data Cleaning & Preparation' from 1 (low) to 5 (high): 5

Data Cleaning & Preparation details generated.

Rate your understanding of 'Exploratory Data Analysis (EDA)' from 1 (low) to 5 (high): 5

Exploratory Data Analysis (EDA) details generated.

Rate your understanding of 'Machine Learning Algorithm Selection' from 1 (low) to 5 (high): 5

Machine Learning Algorithm Selection details generated.

Rate your understanding of 'Model Optimization & Feature Engineering' from 1 (low) to 5 (high): 5

Model Optimization & Feature Engineering details generated.

Rate your understanding of 'Deployment & Real-World Considerations' from 1 (low) to 5 (high): 5

Deployment & Real-World Considerations details generated.
Report saved as 'Detailed_Adaptive_Data_Science_W