In [159]:
from typing import Optional, Dict, List, Union
from crewai import Agent, Task, Crew
from crewai_tools import BaseTool
from langchain_cohere import ChatCohere
from langchain_ollama import ChatOllama
from youtube_transcript_api import YouTubeTranscriptApi

In [160]:
import os
import requests
from IPython.display import Markdown,display

In [162]:
# -- Ollama --#
llm = ChatOllama(
    model=os.environ['llm-model'],
    base_url=os.environ['llm-model-api-url']
)

# -- Cohere API -- #
# llm = ChatCohere()

## Custom Retrieval Tools

In [163]:
class YoutubeSearchTool(BaseTool):
    name: str = "YoutubeV3API"
    description: str = "YouTube API for Real-Time Video Caption Searching"
    
    def _run(self, query: Union[Dict, str]) -> dict:
        if type(query) == dict:
            for key in query.keys():
                if type(query[key]) == str:
                    query = query[key]
                    break

        response = requests.get(
            url=os.environ["YOUTUBE_API_URL"],
            params={
                "q": query,
                "type": "video",
                "videoCaption": "closedCaption",
                "key": os.environ["YOUTUBE_API_KEY"]
            }
        )
        if response.status_code != 200:
            return []
        response = response.json()
        results = []
        for item in response["items"]:
            content = YouTubeTranscriptApi.get_transcript(item['id']['videoId'])
            raw_text = [part["text"] for part in content]
            raw_text = " ".join(raw_text)
            results.append(raw_text)
        return results

In [164]:
class AresRealTimeSearch(BaseTool):
    name: str = "Ares"
    description: str = "API for Real-Time Internet Search"
    
    def _run(self, query: Union[Dict, str]) -> dict:
        if type(query) == dict:
            for key in query.keys():
                if type(query[key]) == str:
                    query = query[key]
                    break
        payload = { "query": [query] }
        headers = {
            "x-api-key": os.environ["ARES_API_KEY"],
            "content-type": "application/json"
        }
        response = requests.post(os.environ["ARES_API_URL"], json=payload, headers=headers)
        if response.status_code != 200:
            return {}
        response = response.json()
        return response["data"]
        


## Agents

In [165]:
researcher = Agent(
    role="Content Researcher",
    goal="Fetch real-time information about the below topic to help compose a helpful response to the actual user query:\n{topic}",
    backstory="Given a user query, your task is to fetch actual facts and information for online resources to compose an insightful knowledge bank. This will be used by other agents to compose a response for the user query. Convert the user query into a string-based search keywords, and use the appropriate tools to fetch relevant information.",
    llm=llm,
    allow_delegation=False,
    tools=[YoutubeSearchTool(), AresRealTimeSearch()],
    verbose=True
)

In [166]:
content_planner = Agent(
    role="Content Planner",
    goal="Given a user query and relevant information from the internet, compose a general structure and outline for a helpful response",
    backstory="You are working as a content planner to help users with general purpose queries. You will be provided with real-time information about the user request and you are supposed to generate a table of contents for the writer and editor to compose a helpful response",
    llm=llm,
    allow_delegation=False,
    verbose=True
)

In [167]:
content_writer = Agent(
    role="Content Wrtier",
    goal="Given relevant information and an outline, you are required to compose a helpful response for the user query",
    backstory="You are the final content writer. You will be provided all relevant information and outline for the response. You are required to use all required inputs to compose a helpful response in a summarized way that answers the user query and provides him with an effective solution",
    llm=llm,
    allow_delegation=False,
    verbose=True
)

## Tasks

In [168]:
research_content = Task(
    description=(
        "Given the user query: {topic}\n Convert the user query into relevant search keywords and use the provided tools to search relevant content form the internet and fetch all possible resources to help answer the user query"
    ),
    expected_output="A list of resources and relevant content that can be useful to answer the user query",
    agent=researcher,
    
)

In [169]:
plan_content = Task(
    description=(
        "Given the user query and relvant content, curate a table of content for the response"
    ),
    expected_output="A comprehensive plan that can be used to curate a well written response",
    agent=content_planner
)

In [170]:
write_response = Task(
    description=(
        "Write a detailed and helpful response when provided relevant content and a well-planned table of contents"
    ),
    expected_output="A well written response that can help user with his query",
    agent=content_writer
)

## Pipeline

In [171]:
pipeline = Crew(
    agents=[researcher, content_planner, content_writer],
    tasks=[research_content, plan_content, write_response],
    verbose=True,
)



## Inference

In [172]:
inputs = {
    "topic": "Prepare a intro slide on Regression techniques."
}

In [173]:
result = pipeline.kickoff(inputs=inputs)

[1m[95m [2024-09-04 18:43:27][DEBUG]: == Working Agent: Content Researcher[00m
[1m[95m [2024-09-04 18:43:27][INFO]: == Starting Task: Given the user query: Prepare a intro slide on Regression techniques.
 Convert the user query into relevant search keywords and use the provided tools to search relevant content form the internet and fetch all possible resources to help answer the user query[00m


[1m> Entering new CrewAgentExecutor chain...[0m
[32;1m[1;3mThought: I need to find relevant resources about regression techniques for creating an intro slide presentation.  I'll use YoutubeV3API to find video content related to regression and Ares to search for information on the topic from the web. 

Action: YoutubeV3API
Action Input: {'query': {'title': 'Regression Techniques Intro Slide'}}[0m[95m 

['- WELCOME TO INTRODUCTION \nTO REGRESSION ANALYSIS. THE GOALS OF THIS VIDEO ARE TO \nDEFINE REGRESSION ANALYSIS AND ALSO TO DETERMINE HOW TO \nMEASURE HOW WELL A MODEL FITS GIVEN DAT

In [174]:
display(Markdown(result.raw))

**Introduction to Regression Analysis**

Regression analysis is a statistical tool used to understand how variables are related to one another. It helps us predict future values based on existing data by using mathematical equations and models that capture the underlying relationships between variables.  The goal of regression analysis is to model the relationship between independent (predictor) variables and a dependent variable (outcome). 

**Understanding Statistical Significance**

A key aspect of regression analysis involves understanding statistical significance, which helps us determine if observed effects are real or simply due to random chance. Here's how we approach this:

* **P-values:**  The p-value represents the probability of observing results as extreme as those obtained from our data, assuming there is no actual relationship between variables. A low p-value (typically below 0.05) indicates that the observed effect is likely statistically significant and suggests that our model accurately reflects a real relationship.
* **t-statistic:**  The t-statistic measures how many standard deviations away from zero an estimated regression coefficient lies. It helps us assess the magnitude of the effect size and determine its significance based on the sample data.  
* **Confidence Intervals:** These intervals offer a range within which we believe the true population value likely falls. The confidence level (e.g., 95%) reflects our probability that the true value lies within this interval. 

**Regression Model Output Explained**

Understanding regression model output is crucial for interpreting results:
   * **Estimated Regression Equation:** This equation expresses the relationship between the dependent variable and one or more independent variables, typically using a linear form like y = b0 + b1x1 + b2x2 +... . 
   * **R² Value (Coefficient of Determination):**  This value represents the percentage of variance in the dependent variable that is explained by the independent variables. A higher R² suggests better predictive power and model fit.  
   * **Standard Errors:** These values provide an estimate of the uncertainty associated with each estimated coefficient, reflecting the reliability of the coefficients' estimates. 
   * **F-statistic:**  This statistic assesses if there is a statistically significant relationship between independent variables and the dependent variable. A high F-statistic (typically larger than 1) suggests strong evidence for a significant relationship.

**Hypothesis Testing: A Step-by-Step Guide**
To test hypotheses related to regression models, we employ the following steps:

1. **Formulate Null and Alternative Hypotheses:** 
   * Null Hypothesis (H0): There is no linear relationship between the independent variable(s) and the dependent variable (no effect).
   * Alternative Hypothesis (Ha): There is a linear relationship between the independent variables and the dependent variable (a significant effect).

2. **Calculate the t-statistic:**  This measures how many standard deviations away from zero our estimated coefficients lie. 

3. **Calculate p-value:** This represents the probability of observing the data under the assumption that there is no true relationship between the independent and dependent variables. A low p-value (typically below 0.05) suggests strong evidence against the null hypothesis. 
 
4. **Compare p-values to Significance Level:**  We compare our calculated p-value with a predetermined significance level (e.g., 0.05). If the p-value is less than the significance level, we reject the null hypothesis in favor of the alternative hypothesis.

**Example: Midterm Elections and Unemployment Rate**
Let's apply this to an example of midterm elections and unemployment rate:  We may hypothesize that a higher unemployment rate leads to a decrease in voter turnout for midterm elections. 

* **Hypotheses:**
    * **Null Hypothesis (H0):** There is no relationship between unemployment rates and voter turnout for midterm elections.
    * **Alternative Hypothesis (Ha):** There is a negative correlation between unemployment rate and voter turnout in midterm elections.  (i.e., Higher unemployment leads to lower voter turnout).

* **Calculation:** A regression analysis model can estimate the relationship, incorporating factors such as income, education levels, political party affiliations, and geographic location. 
* **Hypothesis Testing:** The p-value from the regression analysis helps determine the significance of this relationship. For example, if the p-value is low (e.g., <0.05), we would reject the null hypothesis, suggesting a statistically significant relationship between unemployment and voter turnout in the midterm elections.


**Interpreting Regression Results: A Deeper Look at Significance**

The estimated coefficients represent the change in the dependent variable for each unit increase in an independent variable, holding all other variables constant. We can interpret these effects based on their significance (p-value). 
   * **Significance Level:**  For example, if the p-value is less than 0.05, we would reject the null hypothesis and say that the relationship is statistically significant. 
   * **Confidence Interval:** This helps in understanding the range of values for the estimated coefficient with a certain level of confidence (e.g., 95%).

**Assumptions of Regression Analysis**
Regression analysis relies on various assumptions for its validity:
   * **Linearity:** The relationship between variables should be linear, where changes in one variable lead to proportional changes in another. This assumption ensures the model can accurately reflect the relationship and makes it possible to calculate coefficients. 
   * **Independence of observations:** Each observation in the data set should be independent of others (no systematic influence) on the dependent variable.  This is a key assumption for the statistical validity of the analysis. 
   * **Normality of residuals:** The distribution of the residuals (errors between actual and predicted values) should be approximately normal. This helps ensure that the model assumptions hold, as normality impacts confidence intervals and hypothesis testing.


**Practical Applications of Regression Analysis**

Beyond studying elections:

* **Predicting Customer Behavior:**  Regressions can predict customer behavior based on demographics or past purchases to improve marketing strategies.
   
* **Forecasting Market Trends:**  Regression analysis helps forecast market trends by analyzing historical data to identify patterns and relationships between variables, such as supply and demand. 



**Conclusion: Summary of Key Concepts**

Regression analysis is a valuable tool for understanding how variables are related and making accurate predictions based on existing data. This powerful method involves hypothesis testing, interpretation of model output, and statistical assumptions that must be considered to ensure the accuracy of the results.


 **References:**  (Insert relevant references here if required)