**SOFTWARE DOCUMENTATION USING CODE SUMMARISATION**

In [1]:
!pip install datasets
!pip install transformers

Collecting datasets
  Downloading datasets-2.19.0-py3-none-any.whl (542 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m542.0/542.0 kB[0m [31m5.7 MB/s[0m eta [36m0:00:00[0m
Collecting dill<0.3.9,>=0.3.0 (from datasets)
  Downloading dill-0.3.8-py3-none-any.whl (116 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m116.3/116.3 kB[0m [31m7.3 MB/s[0m eta [36m0:00:00[0m
Collecting xxhash (from datasets)
  Downloading xxhash-3.4.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.1/194.1 kB[0m [31m6.7 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting multiprocess (from datasets)
  Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.8/134.8 kB[0m [31m7.5 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub>=0.21.2 (from datasets)
  Downloading huggingface_hub-0.22.2-py3-none-any.

In [2]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

Mounted at /content/drive


In [10]:
from transformers import BartTokenizer, BartForConditionalGeneration
import os

# Input folder containing Python source files
fine_tuned_model_path =input("Enter the path of the fine-tuned model: ")
input_folder = input("Enter the path of the folder containing Python source files: ")

# Load fine-tuned BART tokenizer and model
bart_tokenizer = BartTokenizer.from_pretrained("facebook/bart-base")
fine_tuned_model = BartForConditionalGeneration.from_pretrained(fine_tuned_model_path)

# Load BART-large-cnn tokenizer and model
bart_large_cnn_tokenizer = BartTokenizer.from_pretrained("facebook/bart-large-cnn")
bart_large_cnn_model = BartForConditionalGeneration.from_pretrained("facebook/bart-large-cnn")

# Step 1: Preprocessing
def preprocess_folder(folder_path):
    source_files = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".py"):
            with open(os.path.join(folder_path, filename), 'r', encoding='utf-8') as file:
                source_files.append(file.read())
    return source_files

# Step 2: Method Summarization using fine-tuned BART model
def summarize_method(method_text, tokenizer, model):
    input_ids = tokenizer.encode("summarize: " + method_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(input_ids, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Step 3: Source File Summarization using BART-large-cnn model
def summarize_source_file(method_summaries, tokenizer, model):
    input_text = " ".join(method_summaries)
    input_ids = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(input_ids, max_length=300, min_length=100, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary

# Step 4: Overall Project Summarization using BART-large-cnn model
def summarize_project(source_file_summaries, tokenizer, model):
    input_text = " ".join(source_file_summaries)
    input_ids = tokenizer.encode("summarize: " + input_text, return_tensors="pt", max_length=1024, truncation=True)
    summary_ids = model.generate(input_ids, max_length=500, min_length=200, length_penalty=2.0, num_beams=4, early_stopping=True)
    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
    return summary


# Step 1: Preprocessing
source_files = preprocess_folder(input_folder)

# Step 2: Method Summarization using fine-tuned BART model
method_summaries = [summarize_method(method, bart_tokenizer, fine_tuned_model) for method in source_files]

# Step 3: Source File Summarization using BART-large-cnn model
source_file_summary = summarize_source_file(method_summaries, bart_large_cnn_tokenizer, bart_large_cnn_model)

# Step 4: Overall Project Summarization using BART-large-cnn model
project_summary = summarize_project([source_file_summary], bart_large_cnn_tokenizer, bart_large_cnn_model)


import re

def simplify_code_summary(summary):
    # Split the summary into lines
    lines = summary.split("\n")
    updated_lines = []

    # Process each line individually
    unique_lines = set()
    for line in lines:
        # Remove consecutive repeating words
        line = re.sub(r'\b(\w+)(?:\W+\1\b)+', r'\1', line)

        # Remove words identified by dot (.)
        line = re.sub(r'\b\w+\.\w+\b', '', line)

        # Remove specified words from the line
        line = re.sub(r'\b(return|def|summary)\b', '', line)

        # Remove function calls with arguments
        line = re.sub(r'\b\w+\s*\([^()]*\)\s*', '', line)

        # Introduce new line whenever a period (.) is encountered
        line = line.replace('.', '.\n')

        # Remove hashes (#)
        line = line.replace('#', '')

        # Add line to updated lines if it's not a repetition
        if line not in unique_lines:
            updated_lines.append(line.strip())
            unique_lines.add(line)

    # Join the updated lines with new lines
    updated_summary = "\n".join(updated_lines)

    return updated_summary.strip()

print("PROJECT SUMMARY")
print(simplify_code_summary(project_summary))

Enter the path of the fine-tuned model: /content/drive/MyDrive/finetuned_model
Enter the path of the folder containing Python source files: /content/drive/MyDrive/chess_game
PROJECT SUMMARY
The game engine scales chess pieces to fit the square size of the board.
 The game state is displayed as a black and white image of the chess board.
 It is used to show the state of the game as well as the current state of each piece.
 It also shows the current position of the player and their position on the board at the start and end of a game.
 It can also be used to display the position of different pieces at different times in the game, such as when a move is made or a new move is played.
 For more information on how to play chess in the UK, visit .
 or go to .
com.
 In the .
 and Canada, go to  or call the National Chess Association on 1-800-273-8255 or visit a local branch of the NCA.
 In Europe, the NCCA is based at the University of Edinburgh.
