In [2]:
# installing dependencies
!pip install langchain-google-genai langchain_community

Collecting langchain_community
  Downloading langchain_community-0.3.19-py3-none-any.whl.metadata (2.4 kB)
Collecting langchain-core<0.4.0,>=0.3.37 (from langchain-google-genai)
  Downloading langchain_core-0.3.41-py3-none-any.whl.metadata (5.9 kB)
Collecting langchain<1.0.0,>=0.3.20 (from langchain_community)
  Downloading langchain-0.3.20-py3-none-any.whl.metadata (7.7 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain_community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain_community)
  Downloading pydantic_settings-2.8.1-py3-none-any.whl.metadata (3.5 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain_community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain_community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclass

In [24]:
# Import necessary modules with error handling
try:
    from langchain_google_genai import ChatGoogleGenerativeAI, GoogleGenerativeAIEmbeddings
    from langchain_community.document_loaders import WebBaseLoader
    from langchain.chains import StuffDocumentsChain
    from langchain.chains.llm import LLMChain
    from langchain.prompts import PromptTemplate
    import google.generativeai as genai
    import os
    from google.colab import userdata
    import git
    import glob
except ModuleNotFoundError as e:
    print(f"Module not found: {e}")
    # Install missing modules if necessary
    !pip install langchain-google-genai langchain-community

# Configure API key and initialize model
if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = userdata.get('api_key')

genai.configure(api_key=os.environ['GOOGLE_API_KEY'])

# Function to initialize model
def initialize_model():
    try:
        llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro")
        return llm
    except ValueError as e:
        print(f"Error: {e}")
        print("Switching to alternative model.")
        # Check if alternative model is available, otherwise handle accordingly
        try:
            from langchain_alternative_module import ChatAlternativeModel  # Placeholder name
            llm = ChatAlternativeModel(model="alternative-model")  # Specify the correct model name if needed
            return llm
        except ModuleNotFoundError:
            print("Alternative model module not found.")
            return None

# Initialize the model
llm = initialize_model()

In [34]:
# Clone the repository (or use a local directory)
REPO_URL = "https://github.com/Hossain-Shah/Robi_Datathon_0100_pandas.git"
LOCAL_REPO_PATH = "/content/drive/MyDrive/Colab_Notebook/Robi_Datathon_0100_pandas"

def clone_repo(repo_url, local_path):
    """Clones a GitHub repository if not already cloned."""
    if not os.path.exists(local_path):
        print(f"Cloning repository from {repo_url}...")
        git.Repo.clone_from(repo_url, local_path)
    else:
        print("Repository already cloned.")

def read_code_files(repo_path, extensions=[".py", ".ipynb", ".js", ".java", ".cpp", ".ts"]):
    """Reads all code files from the repository."""
    files = []
    for ext in extensions:
        files.extend(glob.glob(f"{repo_path}/**/*{ext}", recursive=True))

    code_contents = {}
    for file in files:
        with open(file, "r", encoding="utf-8", errors="ignore") as f:
            code_contents[file] = f.read()
    return code_contents

def summarize_code(file_path, code):
    """Summarizes code using Google-gemini model."""
    prompt_template = PromptTemplate.from_template("Summarize the following code from {file_path}:\n\n{code}\n\nSummary:")
    prompt = prompt_template.format(file_path=file_path, code=code)
    llm_chain = LLMChain(llm=llm, prompt=prompt_template)
    stuff_chain = StuffDocumentsChain(llm_chain=llm_chain, document_variable_name="code")
    response = llm_chain.run({"file_path": file_path, "code": code})
    return response

def analyze_repository():
    """Main function to analyze a repository."""
    clone_repo(REPO_URL, LOCAL_REPO_PATH)
    code_files = read_code_files(LOCAL_REPO_PATH)

    repo_summary = []
    for file, code in code_files.items():
        print(f"Analyzing {file}...")
        summary = summarize_code(file, code[:2000])  # Limit input size
        repo_summary.append(f"üìÑ **{file}**:\n{summary}\n")

    # Generate final repository summary
    final_summary_template = PromptTemplate.from_template("Provide an overview of the repository structure based on these file summaries:\n\n{repo_summaries}")
    final_prompt = final_summary_template.format(repo_summaries="\n".join(repo_summary))
    llm_chain = LLMChain(llm=llm, prompt=final_summary_template)
    final_summary = llm_chain.run({"repo_summaries": "\n".join(repo_summary)})

    # Print and save the summary
    print("\nüìù Repository Summary:\n", final_summary)
    with open("/content/drive/MyDrive/Colab_Notebooks/repo_summary.txt", "w") as f:
        f.write(final_summary)

# Run the analysis
if __name__ == "__main__":
    analyze_repository()

Repository already cloned.
Analyzing /content/drive/MyDrive/Colab_Notebook/Robi_Datathon_0100_pandas/shahnawaz/utils/Robi_Datathon_problems_solution.ipynb...

üìù Repository Summary:
 The repository appears to be structured for a data science competition (likely the "Robi Datathon"). It's using Google Colab and focuses on pandas for data manipulation. Here's a breakdown of the implied structure:

```
Colab_Notebook/
‚îî‚îÄ‚îÄ Robi_Datathon_0100_pandas/
    ‚îî‚îÄ‚îÄ shahnawaz/
        ‚îî‚îÄ‚îÄ utils/
            ‚îî‚îÄ‚îÄ Robi_Datathon_problems_solution.ipynb
        ‚îî‚îÄ‚îÄ <Likely location for data files: purchase.csv, boxes.csv, problem1.csv>
        ‚îî‚îÄ‚îÄ <Likely location for output files: submission_1.csv>
```

**Key aspects of the structure:**

* **`Colab_Notebook/`**: The root directory suggests this is based in Google Colab.
* **`Robi_Datathon_0100_pandas/`**:  This directory likely contains all the work related to the Robi Datathon, specifically using pandas. The "0100