In [9]:
"""
This code reads in data from a CSV file containing commit information for a given repository. 
It calculates the number of lines of code added or removed in each commit, 
and uses that information to compute the size of the repository at the time of each commit. 
The repo size is calculated by dividing the total number of lines of code by the number of files in the repository. 
The resulting CSV file includes columns for the repository name, repository owner, commit ID,
committer name, commit date, code size in lines of code (LOC), 
and repository size in lines of code per file (LOC/file). The code and repo size are measured in lines of code.

Script Author: Jesus Cantu 
"""

import csv

file_name = '/Users/jesuscantu/Desktop/Workspace/MiningSoftwareRepositories/Raw_Data/Commits/swift-argument-parser_commit_data.csv' # path to commit data 
repo_owner = 'apple' # 'repo_owner'
repo_name = 'swift-argument-parser' # 'repo_name'

print(f"Processing repository {repo_name}...")

# Define the header row for the CSV file
header_row = ['repo_name', 'repo_owner', 'commit_id', 'committer_name', 'date', 'code_size_LOC', 'repo_size_LOC']

# Initialize an empty list to store the rows of the CSV file
rows = []

with open(file_name, 'r') as csv_file:
    reader = csv.DictReader(csv_file)
    for row in reader:
        # Calculate code size for the commit
        code_size_LOC = int(row['num_inserts']) - int(row['num_deletes']) + int(row['net_lines'])

        # Calculate repo size for the commit
        num_files = int(row['num_files'])
        if num_files > 0:
            repo_size_LOC = round(code_size_LOC / num_files, 2)
        else:
            repo_size_LOC = 0

        # Add row to the list of rows
        rows.append([repo_name, repo_owner, row['commit_id'], row['committer_name'], row['author_date'], code_size_LOC, repo_size_LOC])

# Write the results to a new CSV file
filename = f"{repo_name}-code_size_data.csv"
output_file = f"/Users/jesuscantu/Desktop/Workspace/MiningSoftwareRepositories/Raw_Data/Commits/Code_Size/{filename}"

with open(output_file, mode='w', newline='') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(header_row)
    for row in rows:
        writer.writerow(row)

print(f"Finished processing repository {repo_name}.")


Processing repository swift-argument-parser...
Finished processing repository swift-argument-parser.
