In [13]:
# Step 1: Clone the GitHub Repository
!git clone https://github.com/S-ee94/My-Projects.git

Cloning into 'My-Projects'...
remote: Enumerating objects: 49, done.[K
remote: Counting objects: 100% (49/49), done.[K
remote: Compressing objects: 100% (49/49), done.[K
remote: Total 49 (delta 17), reused 0 (delta 0), pack-reused 0 (from 0)[K
Receiving objects: 100% (49/49), 44.57 MiB | 18.30 MiB/s, done.
Resolving deltas: 100% (17/17), done.


In [14]:
# Step 2: Set up the environment and libraries
import os
import ast
import re
import nbformat  # For handling Jupyter Notebooks

In [15]:
# Load Python and Notebook files from the repository
def load_code_files(directory):
    code_files = []
    for subdir, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith(".py") or file.endswith(".ipynb"):  # Looking for both Python and notebook files
                file_path = os.path.join(subdir, file)
                code_files.append(file_path)
    return code_files

In [16]:
# Extract code from .ipynb files
def extract_code_from_notebook(file_path):
    with open(file_path, "r", encoding="utf-8") as f:
        notebook_content = nbformat.read(f, as_version=4)

    code = ""
    for cell in notebook_content['cells']:
        if cell['cell_type'] == 'code':  # Extract only code cells
            code += cell['source'] + "\n"

    print(f"Extracted code from {file_path}:\n{code}\n")  # Debugging statement to inspect extracted code
    return code

In [17]:
# Parse and Summarize Functions using AST
def summarize_function(function_node):
    # Get function name
    func_name = function_node.name

    # Get function arguments
    args = [arg.arg for arg in function_node.args.args]

    # Get docstring if available
    docstring = ast.get_docstring(function_node)

    # Create a summary
    summary = f"Function `{func_name}` takes arguments {args}. "
    if docstring:
        summary += f"Docstring: {docstring}"
    else:
        summary += "No docstring available."

    return summary, ast.dump(function_node)

In [18]:
def summarize_functions_in_code(file_content):
    try:
        tree = ast.parse(file_content)
    except SyntaxError as e:
        print(f"Syntax error when parsing code: {e}")
        return {}

    function_summaries = {}

    # Iterate through the AST nodes
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            summary, ast_tree = summarize_function(node)
            function_summaries[node.name] = {"summary": summary, "ast": ast_tree}

    return function_summaries

In [19]:
# Develop Search Mechanism
def search_data_type(file_content, data_type):
    # Search for the data type using regex
    matches = re.findall(r'\b' + re.escape(data_type) + r'\b', file_content)
    return matches

def search_function_usages(file_content, function_name):
    # Search for function usage using regex
    matches = re.findall(r'\b' + re.escape(function_name) + r'\b', file_content)
    return matches

In [20]:
# Apply the Functions to the Repository
def summarize_and_search(directory, search_element, search_type="data_type"):
    code_files = load_code_files(directory)

    if not code_files:
        print(f"No Python or Notebook files found in {directory}")
        return {}, {}

    all_summaries = {}
    search_results = {}

    for file_path in code_files:
        print(f"Processing file: {file_path}")  # Debugging print statement

        if file_path.endswith(".py"):
            with open(file_path, "r") as file:
                file_content = file.read()
        elif file_path.endswith(".ipynb"):
            file_content = extract_code_from_notebook(file_path)

        file_summaries = summarize_functions_in_code(file_content)
        all_summaries[file_path] = file_summaries

        if search_type == "data_type":
            search_results[file_path] = search_data_type(file_content, search_element)
        elif search_type == "function":
            search_results[file_path] = search_function_usages(file_content, search_element)

    return all_summaries, search_results

In [21]:
# Step 3: Run the summarization and search process
repository_path = '/content/My-Projects'  # Path to the cloned repository

In [22]:
# Check if repository is cloned correctly by listing files
!ls $repository_path

 Amazon_SalesData_Analysis.ipynb	     Movie_Recommender_System.ipynb
 bot.py					     Object_Detection.ipynb
'Data-Preprocessing code.py'		    'PPT Video.mp4'
'Deployment code.py'			    'Project - Analysis of Amazon Sales Data.pptx'
'Final Presentation Template Video.mp4'     'Project - Foreign Direct Investment Analysis.pptx'
 Foreign_Direct_Investment_Analytics.ipynb   SeemaBS_CV.pdf
 Manual_Augmentation_new_dataset.ipynb	     Steel_Rods_Counting_using_yolov8x_Client_Data.ipynb
 Miscarriage_PPT.pptx


In [23]:
# Run the summarization and search process
search_for = "int"  # Example: Data type to search for
summaries, results = summarize_and_search(repository_path, search_for, search_type="data_type")

Processing file: /content/My-Projects/Amazon_SalesData_Analysis.ipynb
Extracted code from /content/My-Projects/Amazon_SalesData_Analysis.ipynb:
# Importing the necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
data = pd.read_csv('/content/Amazon Sales data.csv')
data
# Data Cleaning
data.dropna(inplace=True)  # Example step
# Function to parse dates
def parse_dates(date):
    for fmt in ('%m/%d/%Y', '%d-%m-%Y'):
        try:
            return pd.to_datetime(date, format=fmt)
        except ValueError:
            continue
    raise ValueError(f"No valid date format found for {date}")
# Data tranformation
data['Order Date'] = data['Order Date'].apply(parse_dates)
data['Month'] = data['Order Date'].dt.month
data['Year'] = data['Order Date'].dt.year
# Calculate monthly sales
monthly_sales = data.groupby('Month')['Total Revenue'].sum().reset_index()

# Plot monthly sales trend
plt.figure(figsize=(10, 6))
sns.l

In [24]:
# Step 4: Print Summaries and Search Results
if summaries:
    for file_path, summary in summaries.items():
        print(f"File: {file_path}")
        for func_name, func_data in summary.items():
            print(f"\n{func_name}:")
            print(f"Summary: {func_data['summary']}")
            print(f"AST: {func_data['ast']}")
else:
    print("No function summaries were generated.")

if results:
    for file_path, matches in results.items():
        print(f"\nSearch Results in {file_path}: {matches}")
else:
    print("No search results found.")

File: /content/My-Projects/Amazon_SalesData_Analysis.ipynb

parse_dates:
Summary: Function `parse_dates` takes arguments ['date']. No docstring available.
AST: FunctionDef(name='parse_dates', args=arguments(posonlyargs=[], args=[arg(arg='date')], kwonlyargs=[], kw_defaults=[], defaults=[]), body=[For(target=Name(id='fmt', ctx=Store()), iter=Tuple(elts=[Constant(value='%m/%d/%Y'), Constant(value='%d-%m-%Y')], ctx=Load()), body=[Try(body=[Return(value=Call(func=Attribute(value=Name(id='pd', ctx=Load()), attr='to_datetime', ctx=Load()), args=[Name(id='date', ctx=Load())], keywords=[keyword(arg='format', value=Name(id='fmt', ctx=Load()))]))], handlers=[ExceptHandler(type=Name(id='ValueError', ctx=Load()), body=[Continue()])], orelse=[], finalbody=[])], orelse=[]), Raise(exc=Call(func=Name(id='ValueError', ctx=Load()), args=[JoinedStr(values=[Constant(value='No valid date format found for '), FormattedValue(value=Name(id='date', ctx=Load()), conversion=-1)])], keywords=[]))], decorator_list