---

---

# Load version in files and extract data from their commits

---

---

## File packages

### pip installed packages

In [1]:
import os.path
import git as git
import pandas as pd

from configparser import ConfigParser

 ### Local packages


In [2]:
import src.versions as my_versions
import src.understand as my_understand

---

## Constants loading


In [3]:
config: ConfigParser = ConfigParser()
config.read("config.ini")

data_directory: str = config["GENERAL"]["DataDirectory"]
hive_git_directory: str = config["GIT"]["HiveGitDirectory"]
hive_git_repo_Name: str = config["GIT"]["HiveGitRepoName"]

hive_git_path: str = os.path.join(data_directory, hive_git_directory, hive_git_repo_Name)

repo: git.Repo = git.Repo(hive_git_path)

---

## Extracting git tags of minor and major versions

Alpha, beta and patch are not selected

In [4]:
tags = my_versions.get_versions_tags(repo)
tags

[<git.TagReference "refs/tags/rel/release-2.1.0">,
 <git.TagReference "refs/tags/rel/release-2.2.0">,
 <git.TagReference "refs/tags/rel/release-2.3.0">,
 <git.TagReference "refs/tags/rel/release-3.0.0">,
 <git.TagReference "refs/tags/rel/release-3.1.0">,
 <git.TagReference "refs/tags/rel/release-4.0.0">,
 <git.TagReference "refs/tags/release-2.0.0">]

In [5]:
filtered_versions: {(int, int, int): git.Commit} = {}
for tag in my_versions.order_versions(tags):
    filtered_versions[tag.name.split("-")[-1]] =  tag.commit
filtered_versions

{'2.0.0': <git.Commit "7f9f1fcb8697fb33f0edc2c391930a3728d247d7">,
 '2.1.0': <git.Commit "9265bc24d75ac945bde9ce1a0999fddd8f2aae29">,
 '2.2.0': <git.Commit "da840b0f8fa99cab9f004810cd22abc207493cae">,
 '2.3.0': <git.Commit "6f4c35c9e904d226451c465effdc5bfd31d395a0">,
 '3.0.0': <git.Commit "ce61711a5fa54ab34fc74d86d521ecaeea6b072a">,
 '3.1.0': <git.Commit "bcc7df95824831a8d2f1524e4048dfc23ab98c19">,
 '4.0.0': <git.Commit "183f8cb41d3dbed961ffd27999876468ff06690c">}

---

---

# Data processing using understand from scitools


---

---

## Constants loading

In [6]:
hive_git_directory: str = config["GIT"]["HiveGitDirectory"]
data_directory: str = config["GENERAL"]["DataDirectory"]
understand_project_name: str = config["UNDERSTAND"]["UnderstandProjectName"]

understand_project_path: str = os.path.join(data_directory, hive_git_directory, understand_project_name)
if not os.path.exists(understand_project_path):
    my_understand.und_create_command()

---

## Purging potential previous data

In [7]:
my_understand.und_purge_command()

Running command : 
     und purge -db data\hive_data\hive.und
Database purged.



---

## Analyzing projet if asked in config

In [8]:
filtered_versions

{'2.0.0': <git.Commit "7f9f1fcb8697fb33f0edc2c391930a3728d247d7">,
 '2.1.0': <git.Commit "9265bc24d75ac945bde9ce1a0999fddd8f2aae29">,
 '2.2.0': <git.Commit "da840b0f8fa99cab9f004810cd22abc207493cae">,
 '2.3.0': <git.Commit "6f4c35c9e904d226451c465effdc5bfd31d395a0">,
 '3.0.0': <git.Commit "ce61711a5fa54ab34fc74d86d521ecaeea6b072a">,
 '3.1.0': <git.Commit "bcc7df95824831a8d2f1524e4048dfc23ab98c19">,
 '4.0.0': <git.Commit "183f8cb41d3dbed961ffd27999876468ff06690c">}

If set to **yes** in config.ini, the project will be analyzed

<p style="color:red; font-size:20px;">Analyzing will take a long time</p>

If analyzing takes below 5 minutes, it is not working check the config.ini file

In [9]:
my_understand.metrics(filtered_versions)

Metrics analysis is skipped as per configuration.


In [10]:
my_understand.label_all_metrics()

Creating output directory: data\labeled_metrics_output
2.0.0_metrics.csv
Processing metrics file: data\metrics_output\2.0.0_metrics.csv
Number of bugs found in version 2.0.0: 1161
Labeled metrics saved to: data\labeled_metrics_output\2.0.0_labeled_metrics.csv
2.1.0_metrics.csv
Processing metrics file: data\metrics_output\2.1.0_metrics.csv
Number of bugs found in version 2.1.0: 1004
Labeled metrics saved to: data\labeled_metrics_output\2.1.0_labeled_metrics.csv
2.2.0_metrics.csv
Processing metrics file: data\metrics_output\2.2.0_metrics.csv
Number of bugs found in version 2.2.0: 722
Labeled metrics saved to: data\labeled_metrics_output\2.2.0_labeled_metrics.csv
2.3.0_metrics.csv
Processing metrics file: data\metrics_output\2.3.0_metrics.csv
Number of bugs found in version 2.3.0: 56
Labeled metrics saved to: data\labeled_metrics_output\2.3.0_labeled_metrics.csv
3.0.0_metrics.csv
Processing metrics file: data\metrics_output\3.0.0_metrics.csv
Number of bugs found in version 3.0.0: 5691
Lab

In [11]:
my_understand.enrich_metrics()

Creating output directory: data\enriched_metrics_output
Processing enrichment metrics file: data\metrics_output\2.0.0_metrics.csv
Enriched metrics saved to: data\enriched_metrics_output\2.0.0_enrichi_metrics.csv
Processing enrichment metrics file: data\metrics_output\2.1.0_metrics.csv
Enriched metrics saved to: data\enriched_metrics_output\2.1.0_enrichi_metrics.csv
Processing enrichment metrics file: data\metrics_output\2.2.0_metrics.csv
Enriched metrics saved to: data\enriched_metrics_output\2.2.0_enrichi_metrics.csv
Processing enrichment metrics file: data\metrics_output\2.3.0_metrics.csv
Enriched metrics saved to: data\enriched_metrics_output\2.3.0_enrichi_metrics.csv
Processing enrichment metrics file: data\metrics_output\3.0.0_metrics.csv
Enriched metrics saved to: data\enriched_metrics_output\3.0.0_enrichi_metrics.csv
Processing enrichment metrics file: data\metrics_output\3.1.0_metrics.csv
Enriched metrics saved to: data\enriched_metrics_output\3.1.0_enrichi_metrics.csv
Processi

In [12]:
versions = list(filtered_versions.keys())
my_understand.merge_all_metrics(versions)

Creating output directory: data\merged_metrics_output
Final file generated for version 2.0.0: data\merged_metrics_output\2.0.0_static_metrics.csv
Final file generated for version 2.1.0: data\merged_metrics_output\2.1.0_static_metrics.csv
Final file generated for version 2.2.0: data\merged_metrics_output\2.2.0_static_metrics.csv
Final file generated for version 2.3.0: data\merged_metrics_output\2.3.0_static_metrics.csv
Final file generated for version 3.0.0: data\merged_metrics_output\3.0.0_static_metrics.csv
Final file generated for version 3.1.0: data\merged_metrics_output\3.1.0_static_metrics.csv
Final file generated for version 4.0.0: data\merged_metrics_output\4.0.0_static_metrics.csv
