In [None]:
import os
import subprocess
import pandas as pd
import re

# Define complexity score mapping (A is simplest, F is most complex)
complexity_score_map = {
    'A': 1,  # Simplest
    'B': 2,
    'C': 3,
    'D': 4,
    'E': 5,
    'F': 6   # Most complex
}

# Function to extract LOC from Radon raw output
def extract_loc_from_output(raw_output):
    loc_pattern = r"LOC:\s+(\d+)"  # Regex to match LOC value
    loc_match = re.search(loc_pattern, raw_output)
    if loc_match:
        return int(loc_match.group(1))
    return 0

# Function to extract Cyclomatic Complexity (CC) from Radon CC output
def extract_cc_from_output(cc_output):
    cc_pattern = r"- ([A-F])"  # Regex to match cyclomatic complexity grades
    cc_matches = re.findall(cc_pattern, cc_output)

    # Convert complexity grades to numerical scores using the mapping
    total_cc = sum([complexity_score_map[grade] for grade in cc_matches])
    count = len(cc_matches)
    average_cc = total_cc / count if count > 0 else 0
    return average_cc

# Function to clone a GitHub repository into a specific directory
def clone_repo(github_link, project_name, base_dir):
    project_dir = os.path.join(base_dir, project_name)

    # Check if the directory already exists
    if not os.path.exists(project_dir):
        try:
            subprocess.run(['git', 'clone', github_link, project_dir], check=True)
            print(f"Cloned {github_link} into {project_dir}")
        except subprocess.CalledProcessError:
            print(f"Failed to clone {github_link}")
            return None
    else:
        print(f"Repository {project_name} already exists in {base_dir}. Skipping clone.")

    return project_dir

# Function to run analysis on the cloned repository (no Pylint analysis)
def analyze_repo(repo_dir):
    total_loc = 0  # To accumulate LOC across all Python files

    try:
        # Walk through the repository and collect all .py files
        for root, dirs, files in os.walk(repo_dir):
            for file in files:
                if file.endswith('.py'):
                    file_path = os.path.join(root, file)

                    # Run Radon raw on the individual file to get LOC
                    radon_raw_output = subprocess.check_output(['radon', 'raw', file_path]).decode('utf-8')
                    total_loc += extract_loc_from_output(radon_raw_output)

        # Run Radon CC on the entire repository (directory)
        radon_cc_output = subprocess.check_output(['radon', 'cc', repo_dir, '-s']).decode('utf-8')

        # Extract Cyclomatic Complexity (average)
        extracted_CC = extract_cc_from_output(radon_cc_output)

        return total_loc, extracted_CC

    except subprocess.CalledProcessError as e:
        print(f"Analysis failed for {repo_dir}: {e}")
        return None, None

# Read the list of projects from the Excel file
projects_df = pd.read_excel('project-list.xlsx')  # Use read_excel to read the Excel file

# Define the base directory where the repositories will be cloned
base_dir = 'cloned_repos'
os.makedirs(base_dir, exist_ok=True)

# Prepare a list to store the results for each project
results = []

# Loop over each project and process it
for index, row in projects_df.iterrows():
    github_link = row['repo_url']  # Adjust based on your Excel file's column name
    project_name = github_link.split('/')[-1].replace('.git', '')  # Use the repo name as project name

    # Clone the repository
    repo_dir = clone_repo(github_link, project_name, base_dir)

    if repo_dir:
        # Analyze the repository (LOC and CC only)
        loc, cc = analyze_repo(repo_dir)

        if loc is not None and cc is not None:
            # Store the results for this project
            results.append({
                'project_name': project_name,
                'github_link': github_link,
                'lines_of_code': loc,  # Total LOC for the entire project
                'cyclomatic_complexity': cc
            })

# Save the results to a CSV file
results_df = pd.DataFrame(results)
results_df.to_csv('LOCandCC.csv', index=False)

print("Analysis complete for all projects. Results saved to 'multiple_project_analysis_results.csv'.")
