In [1]:
import os
import difflib
import pandas as pd
from pathlib import Path

# Path to the workspace folder
workspace_path = Path("/data/chats/pvp2n/workspace/SASS")

# Function to check if two files are identical
def files_are_identical(file1, file2):
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        return f1.read() == f2.read()

# Get all files in the repository
files = list(workspace_path.glob("**/*"))

# Filter only the specified files
specified_files = [
    "prd_sistema_propostas_comerciais.md",
    "propostas_comerciais_class_diagram (1).mermaid",
    "propostas_comerciais_class_diagram.mermaid",
    "propostas_comerciais_sequence_diagram.mermaid",
    "propostas_comerciais_system_design.md"
]

# Check for duplicates in the specified files
duplicates = []
for i, file1_name in enumerate(specified_files):
    file1_path = workspace_path / file1_name
    if file1_path.exists():
        for j, file2_name in enumerate(specified_files[i+1:], i+1):
            file2_path = workspace_path / file2_name
            if file2_path.exists() and files_are_identical(file1_path, file2_path):
                duplicates.append((file1_name, file2_name))

# Analyze repository structure
def analyze_folder_structure(path, prefix=""):
    result = []
    for item in sorted(path.iterdir()):
        if item.is_dir():
            if item.name != ".git":  # Skip git directory
                result.append(f"{prefix}{item.name}/")
                result.extend(analyze_folder_structure(item, prefix + "  "))
        else:
            result.append(f"{prefix}{item.name}")
    return result

# Get folder structure
folder_structure = analyze_folder_structure(workspace_path)

# Create a dataframe to store file analysis
file_analysis = []
for file_name in specified_files:
    file_path = workspace_path / file_name
    if file_path.exists():
        file_size = file_path.stat().st_size
        file_analysis.append({
            "File Name": file_name,
            "File Size (bytes)": file_size,
            "Description": get_file_description(file_name),
            "Suggested Name": get_suggested_name(file_name)
        })

# Function to get file description
def get_file_description(file_name):
    if "class_diagram" in file_name:
        return "UML Class diagram defining the system's classes, attributes, methods and relationships"
    elif "sequence_diagram" in file_name:
        return "UML Sequence diagram showing interaction between system components"
    elif "prd" in file_name:
        return "Product Requirements Document detailing the system specifications"
    elif "system_design" in file_name:
        return "System architecture and design document"
    else:
        return "Unknown file type"

# Function to suggest better file names
def get_suggested_name(file_name):
    if file_name == "propostas_comerciais_class_diagram.mermaid" or file_name == "propostas_comerciais_class_diagram (1).mermaid":
        return "backend_class_diagram.mermaid"
    elif file_name == "propostas_comerciais_sequence_diagram.mermaid":
        return "system_interaction_diagram.mermaid"
    elif file_name == "prd_sistema_propostas_comerciais.md":
        return "product_requirements.md"
    elif file_name == "propostas_comerciais_system_design.md":
        return "system_architecture.md"
    elif file_name == "react_template":
        return "frontend"
    else:
        return file_name

# Create recommendations for repository organization
recommendations = []
if duplicates:
    for file1, file2 in duplicates:
        recommendations.append(f"Remove duplicate file: {file2} (identical to {file1})")

# Suggest renaming files for better organization
name_changes = []
for file in file_analysis:
    if file["File Name"] != file["Suggested Name"]:
        name_changes.append(f"Rename '{file['File Name']}' to '{file['Suggested Name']}'")

# Suggest renaming react_template folder to frontend
if "react_template/" in folder_structure:
    recommendations.append("Rename 'react_template' directory to 'frontend' for clarity")

# Print analysis results
print("Repository Structure:")
for item in folder_structure:
    print(item)

print("\nDuplicate Files:")
for file1, file2 in duplicates:
    print(f"- {file1} and {file2} are identical")

print("\nFile Analysis:")
for file in file_analysis:
    print(f"- {file['File Name']} ({file['File Size (bytes)']} bytes): {file['Description']}")

print("\nRecommendations:")
for rec in recommendations:
    print(f"- {rec}")

for change in name_changes:
    print(f"- {change}")

# Prepare summary for the repository analysis
print("\nRepository Analysis Summary:")
print("1. The repository contains documentation files (PRD, system design) and diagrams (class, sequence)")
print("2. There is a React template that serves as the frontend codebase")
print("3. The class diagram is duplicated and should be consolidated")
print("4. File naming convention could be improved for better organization and clarity")
print("5. The repository structure could be reorganized to separate documentation and code")

# Execute rename operations for the specified files (commented out for safety)
# for file_data in file_analysis:
#     old_path = workspace_path / file_data["File Name"]
#     new_path = workspace_path / file_data["Suggested Name"]
#     if old_path.exists() and file_data["File Name"] != file_data["Suggested Name"]:
#         os.rename(old_path, new_path)
#         print(f"Renamed: {file_data['File Name']} -> {file_data['Suggested Name']}")

# Execute removal of duplicate files (commented out for safety)
# for file1, file2 in duplicates:
#     duplicate_path = workspace_path / file2
#     if duplicate_path.exists():
#         os.remove(duplicate_path)
#         print(f"Removed duplicate: {file2}")

NameError: name 'get_file_description' is not defined

In [2]:
import os
import difflib
import pandas as pd
from pathlib import Path

# Path to the workspace folder
workspace_path = Path("/data/chats/pvp2n/workspace/SASS")

# Function to check if two files are identical
def files_are_identical(file1, file2):
    with open(file1, 'r') as f1, open(file2, 'r') as f2:
        return f1.read() == f2.read()

# Get all files in the repository
files = list(workspace_path.glob("**/*"))

# Filter only the specified files
specified_files = [
    "prd_sistema_propostas_comerciais.md",
    "propostas_comerciais_class_diagram (1).mermaid",
    "propostas_comerciais_class_diagram.mermaid",
    "propostas_comerciais_sequence_diagram.mermaid",
    "propostas_comerciais_system_design.md"
]

# Check for duplicates in the specified files
duplicates = []
for i, file1_name in enumerate(specified_files):
    file1_path = workspace_path / file1_name
    if file1_path.exists():
        for j, file2_name in enumerate(specified_files[i+1:], i+1):
            file2_path = workspace_path / file2_name
            if file2_path.exists() and files_are_identical(file1_path, file2_path):
                duplicates.append((file1_name, file2_name))

# Analyze repository structure
def analyze_folder_structure(path, prefix=""):
    result = []
    for item in sorted(path.iterdir()):
        if item.is_dir():
            if item.name != ".git":  # Skip git directory
                result.append(f"{prefix}{item.name}/")
                result.extend(analyze_folder_structure(item, prefix + "  "))
        else:
            result.append(f"{prefix}{item.name}")
    return result

# Function to get file description
def get_file_description(file_name):
    if "class_diagram" in file_name:
        return "UML Class diagram defining the system's classes, attributes, methods and relationships"
    elif "sequence_diagram" in file_name:
        return "UML Sequence diagram showing interaction between system components"
    elif "prd" in file_name:
        return "Product Requirements Document detailing the system specifications"
    elif "system_design" in file_name:
        return "System architecture and design document"
    else:
        return "Unknown file type"

# Function to suggest better file names
def get_suggested_name(file_name):
    if file_name == "propostas_comerciais_class_diagram.mermaid" or file_name == "propostas_comerciais_class_diagram (1).mermaid":
        return "backend_class_diagram.mermaid"
    elif file_name == "propostas_comerciais_sequence_diagram.mermaid":
        return "system_interaction_diagram.mermaid"
    elif file_name == "prd_sistema_propostas_comerciais.md":
        return "product_requirements.md"
    elif file_name == "propostas_comerciais_system_design.md":
        return "system_architecture.md"
    elif file_name == "react_template":
        return "frontend"
    else:
        return file_name

# Get folder structure
folder_structure = analyze_folder_structure(workspace_path)

# Create a dataframe to store file analysis
file_analysis = []
for file_name in specified_files:
    file_path = workspace_path / file_name
    if file_path.exists():
        file_size = file_path.stat().st_size
        file_analysis.append({
            "File Name": file_name,
            "File Size (bytes)": file_size,
            "Description": get_file_description(file_name),
            "Suggested Name": get_suggested_name(file_name)
        })

# Create recommendations for repository organization
recommendations = []
if duplicates:
    for file1, file2 in duplicates:
        recommendations.append(f"Remove duplicate file: {file2} (identical to {file1})")

# Suggest renaming files for better organization
name_changes = []
for file in file_analysis:
    if file["File Name"] != file["Suggested Name"]:
        name_changes.append(f"Rename '{file['File Name']}' to '{file['Suggested Name']}'")

# Suggest renaming react_template folder to frontend
if "react_template/" in folder_structure:
    recommendations.append("Rename 'react_template' directory to 'frontend' for clarity")

# Print analysis results
print("Repository Structure:")
for item in folder_structure:
    print(item)

print("\nDuplicate Files:")
for file1, file2 in duplicates:
    print(f"- {file1} and {file2} are identical")

print("\nFile Analysis:")
for file in file_analysis:
    print(f"- {file['File Name']} ({file['File Size (bytes)']} bytes): {file['Description']}")

print("\nRecommendations:")
for rec in recommendations:
    print(f"- {rec}")

for change in name_changes:
    print(f"- {change}")

# Prepare summary for the repository analysis
print("\nRepository Analysis Summary:")
print("1. The repository contains documentation files (PRD, system design) and diagrams (class, sequence)")
print("2. There is a React template that serves as the frontend codebase")
print("3. The class diagram is duplicated and should be consolidated")
print("4. File naming convention could be improved for better organization and clarity")
print("5. The repository structure could be reorganized to separate documentation and code")

# Execute rename operations for the specified files (commented out for safety)
# for file_data in file_analysis:
#     old_path = workspace_path / file_data["File Name"]
#     new_path = workspace_path / file_data["Suggested Name"]
#     if old_path.exists() and file_data["File Name"] != file_data["Suggested Name"]:
#         os.rename(old_path, new_path)
#         print(f"Renamed: {file_data['File Name']} -> {file_data['Suggested Name']}")

# Execute removal of duplicate files (commented out for safety)
# for file1, file2 in duplicates:
#     duplicate_path = workspace_path / file2
#     if duplicate_path.exists():
#         os.remove(duplicate_path)
#         print(f"Removed duplicate: {file2}")

Repository Structure:
.gitattributes
LICENSE
README.md
prd_sistema_propostas_comerciais.md
propostas_comerciais_class_diagram (1).mermaid
propostas_comerciais_class_diagram.mermaid
propostas_comerciais_sequence_diagram.mermaid
propostas_comerciais_system_design.md
react_template/
  .gitignore
  README.md
  eslint.config.js
  index.html
  package.json
  pnpm-lock.yaml
  postcss.config.js
  public/
    assets/
      images/
        .gitkeep
    data/
      example.json
  src/
    App.jsx
    api/
      apiClient.js
      authService.js
      customerService.js
      proposalService.js
      templateService.js
    components/
      customers/
        CustomerCard.jsx
        CustomerForm.jsx
      layout/
        Layout.jsx
        Navbar.jsx
        Sidebar.jsx
      proposals/
        ProposalCard.jsx
        ProposalForm.jsx
      templates/
        TemplateCard.jsx
      ui/
        Alert.jsx
        Badge.jsx
        Button.jsx
        Card.jsx
        Input.jsx
        Table.jsx
   