#**Study Case: Evaluating Quality from Projects of MIT App Inventor**




---



In [31]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import scipy.cluster.hierarchy as hc
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn.cluster import AgglomerativeClustering

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

%matplotlib inline

from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')

In [32]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [33]:
file = "/content/drive/My Drive/appinventor_analysis/dataset.csv"

**1) Statidistic data extracted from dataset:**


---



In [34]:
df = pd.read_csv(file, sep=';')

In [35]:
df.describe()

Unnamed: 0,AppVersionCode,NumberOfScreens,NumberOfTotalBlocks,NumberOfDefinedFunctionalBlocks,NumberOfUsedFunctionalBlocks,NumberOfEventsBlocks,NumberOfConditionalBlocks,NumberOfLoopsBlocks,NumberOfGlobalVariables,NumberOfLocalVariables,NumberOfUserInterfaceBlocks,NumberOfLayoutBlocks,NumberOfMediaBlocks,NumberOfDrawingAnimationBlocks,NumberOfMapsBlocks,NumberOfChartsBlocks,NumberOfSensorsBlocks,NumberOfSocialBlocks,NumberOfStorageBlocks,NumberOfConnectivityBlocks,NumberOfLegoBlocks,NumberOfExperimentalBlocks,NumberOfScreensErrors,PercentageDuplicateBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,NumberOfParamatersErrors,VariablesNotUsed,PercentageComments,MagicNumbers,NumberOfFunctionsWithTooManyBlocks,NumberOfIfBlocksTooManyNested,MinCiclomaticComplexity,MaxCiclomaticComplexity,AvgCiclomaticComplexity,MinCognitiveComplexity,MaxCognitiveComplexity,AvgCognitiveComplexity,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition,NumberOfVariablesInConditionNotUpdated,NumberOfCamelCaseVariables,NumberOfCamelCaseDefinedFunctionalBlocks,NumberOfPascalCaseVariables,NumberOfPascalCaseDefinedFunctionalBlocks,NumberOfSnakeCaseVariables,NumberOfSnakeCaseDefinedFunctionalBlocks,NumberOfNoneCaseVariables,NumberOfNoneCaseDefinedFunctionalBlocks
count,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0
mean,1.284844,1.1533,128.620649,1.447999,9.941778,8.266837,3.946633,0.26699,3.463316,0.30812,54.41413,2.488878,5.53595,27.232053,0.171387,0.0,5.503447,0.533511,4.923742,5.066018,0.143052,0.313932,0.000794,1.055744,0.308817,0.021836,0.050111,0.267784,0.365723,1.449992,0.128069,0.58744,0.625202,0.917856,0.724913,0.812845,1.490564,1.048406,0.001942,0.005357,0.039393,0.041855,0.101917,0.001905,0.000929,0.731421,0.522932,0.38402,0.315869,0.288812,0.586818,0.208373,2.345817,0.566794
std,8.437006,0.756459,138.737149,2.617231,13.532433,10.069302,5.811005,0.980861,4.889437,1.11695,128.152058,16.835835,25.883707,58.002135,8.13711,0.0,14.792882,6.055018,23.418029,19.975531,2.90455,4.74796,0.028175,6.495454,1.118109,0.259472,0.263312,1.35107,1.796877,4.77612,0.490103,1.985079,0.826539,1.267157,0.930928,1.818769,3.008281,2.058346,0.077691,0.171663,0.501245,0.788914,0.546901,0.198268,0.056611,1.338948,1.540991,1.187401,1.223953,1.184697,2.231482,0.929029,3.649413,1.550008
min,0.0,1.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,85.0,0.0,3.0,4.0,1.0,0.0,1.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,106.0,0.0,7.0,6.0,3.0,0.0,2.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
75%,1.0,1.0,140.0,2.0,13.0,10.0,5.0,0.0,5.0,0.0,65.0,0.0,3.0,38.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.14,1.0,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0
max,999.0,30.0,19000.0,241.0,915.0,1235.0,755.0,74.0,298.0,82.0,10398.0,2594.0,2022.0,5980.0,3051.0,0.0,2638.0,610.0,2151.0,2296.0,193.0,605.0,1.0,99.63,89.0,56.0,17.0,100.0,100.0,413.0,46.0,144.0,20.0,20.0,20.0,71.0,71.0,71.0,12.0,44.0,81.0,119.0,89.0,89.0,20.0,163.0,118.0,140.0,86.0,94.0,139.0,72.0,298.0,241.0


**2) Calculating Technical Debt from the projects:**


---



In [36]:
timePerBlock = 0.554

In [37]:
technicalDebt = [0] * 215244

**Code Smells & Bugs Level 1**

---



**M25**: NameOfVariableError

In [38]:
remediationEffort = 2

for index, row in df.iterrows():
  if(row.NameOfVariablesErrors > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NameOfVariablesErrors * remediationEffort)


**M26**: NameOfFunctionError

In [39]:
remediationEffort = 2

for index, row in df.iterrows():
  if(row.NameOfFunctionsErrors > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NameOfFunctionsErrors * remediationEffort)

**M45**: PercentageComments

In [40]:
maximumPercentageOfComments = 0.3
remediationEffort = 2

for index, row in df.iterrows():

  if(row.PercentageComments > maximumPercentageOfComments):
    percentageOfCommentsToDelete = row.PercentageComments - maximumPercentageOfComments
    technicalDebt[index] = technicalDebt[index] + (percentageOfCommentsToDelete * remediationEffort)

**Code Smells & Bugs Level 2**

---



**M23**: NumberOfDefinedFunctionalBlocksNotUsed

In [41]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.NumberOfDefinedFunctionalBlocksNotUsed > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfDefinedFunctionalBlocksNotUsed * remediationEffort)

**M35**: VariablesNotUsed

In [42]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.VariablesNotUsed > 1):
    technicalDebt[index] = technicalDebt[index] + (row.VariablesNotUsed * remediationEffort)

**M43**: NumberOfParametersErrors

In [43]:
remediationEffort = 20

for index, row in df.iterrows():
  if(row.NumberOfParamatersErrors > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfParamatersErrors * remediationEffort)

**M48**: NumberOfVariableNotInitialized

In [44]:
remediationEffort = 2

for index, row in df.iterrows():
  if(row.NumberOfVariablesNotInitialized > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfVariablesNotInitialized * remediationEffort)

**M49**: NumberOfIndexOutOfRange

In [45]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.NumberOfIndexOutOfRange > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfIndexOutOfRange * remediationEffort)

**M50**: NumberOfVariablesInConditionNotUpdated

In [46]:
remediationEffort = 15

for index, row in df.iterrows():
  if(row.NumberOfVariablesInConditionNotUpdated > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfVariablesInConditionNotUpdated * remediationEffort)

**M51**: NumberOfFunctionalBlocksWithoutReturn

In [47]:
remediationEffort = 20

for index, row in df.iterrows():
  if(row.NumberOfFunctionalBlocksWithoutReturn > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfFunctionalBlocksWithoutReturn * remediationEffort)

**Code Smells & Bugs Level 3**

---



**M42**: MagicNumbers

In [48]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.MagicNumbers > 1):
    technicalDebt[index] = technicalDebt[index] + (row.MagicNumbers * remediationEffort)

**M44**: NumberOfIfBlocksTooManyNested

In [49]:
remediationEffort = 10

for index, row in df.iterrows():
  if(row.NumberOfIfBlocksTooManyNested > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfIfBlocksTooManyNested * remediationEffort)

**M46**: NumberOfDivisionByZero

In [50]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.NumberOfDivisionByZero > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfDivisionByZero * remediationEffort)

**M47**: NumberOfInfiniteLoops

In [51]:
remediationEffort = 15

for index, row in df.iterrows():
  if(row.NumberOfInfiniteLoops > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfInfiniteLoops * remediationEffort)

**M52**: NumberOfWhileLoopsWithoutCondition

In [52]:
remediationEffort = 15

for index, row in df.iterrows():
  if(row.NumberOfWhileLoopsWithoutCondition > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfWhileLoopsWithoutCondition * remediationEffort)

**Code Smells Level 4**


---



**M22**: PercentageDuplicateBlocks

In [53]:
maximumPercentageOfDuplicateBlocks = 0.2
remediationEffort = 15

for index, row in df.iterrows():
  if(row.PercentageDuplicateBlocks > maximumPercentageOfDuplicateBlocks):
    percentageOfDuplicateBlocksToDelete = row.PercentageDuplicateBlocks - maximumPercentageOfDuplicateBlocks
    technicalDebt[index] = technicalDebt[index] + (percentageOfDuplicateBlocksToDelete * remediationEffort)

**M24**: NumberOfFunctionsWithTooManyBlocks

In [54]:
remediationEffort = 20

for index, row in df.iterrows():
  if(row.NumberOfFunctionsWithTooManyBlocks > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfFunctionsWithTooManyBlocks * remediationEffort)

**Adding the new column "TechnicalDebt" to the dataset:**


---



In [55]:
df['TechnicalDebt'] = technicalDebt

In [56]:
df.describe()

Unnamed: 0,AppVersionCode,NumberOfScreens,NumberOfTotalBlocks,NumberOfDefinedFunctionalBlocks,NumberOfUsedFunctionalBlocks,NumberOfEventsBlocks,NumberOfConditionalBlocks,NumberOfLoopsBlocks,NumberOfGlobalVariables,NumberOfLocalVariables,NumberOfUserInterfaceBlocks,NumberOfLayoutBlocks,NumberOfMediaBlocks,NumberOfDrawingAnimationBlocks,NumberOfMapsBlocks,NumberOfChartsBlocks,NumberOfSensorsBlocks,NumberOfSocialBlocks,NumberOfStorageBlocks,NumberOfConnectivityBlocks,NumberOfLegoBlocks,NumberOfExperimentalBlocks,NumberOfScreensErrors,PercentageDuplicateBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,NumberOfParamatersErrors,VariablesNotUsed,PercentageComments,MagicNumbers,NumberOfFunctionsWithTooManyBlocks,NumberOfIfBlocksTooManyNested,MinCiclomaticComplexity,MaxCiclomaticComplexity,AvgCiclomaticComplexity,MinCognitiveComplexity,MaxCognitiveComplexity,AvgCognitiveComplexity,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition,NumberOfVariablesInConditionNotUpdated,NumberOfCamelCaseVariables,NumberOfCamelCaseDefinedFunctionalBlocks,NumberOfPascalCaseVariables,NumberOfPascalCaseDefinedFunctionalBlocks,NumberOfSnakeCaseVariables,NumberOfSnakeCaseDefinedFunctionalBlocks,NumberOfNoneCaseVariables,NumberOfNoneCaseDefinedFunctionalBlocks,TechnicalDebt
count,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0
mean,1.284844,1.1533,128.620649,1.447999,9.941778,8.266837,3.946633,0.26699,3.463316,0.30812,54.41413,2.488878,5.53595,27.232053,0.171387,0.0,5.503447,0.533511,4.923742,5.066018,0.143052,0.313932,0.000794,1.055744,0.308817,0.021836,0.050111,0.267784,0.365723,1.449992,0.128069,0.58744,0.625202,0.917856,0.724913,0.812845,1.490564,1.048406,0.001942,0.005357,0.039393,0.041855,0.101917,0.001905,0.000929,0.731421,0.522932,0.38402,0.315869,0.288812,0.586818,0.208373,2.345817,0.566794,39.052007
std,8.437006,0.756459,138.737149,2.617231,13.532433,10.069302,5.811005,0.980861,4.889437,1.11695,128.152058,16.835835,25.883707,58.002135,8.13711,0.0,14.792882,6.055018,23.418029,19.975531,2.90455,4.74796,0.028175,6.495454,1.118109,0.259472,0.263312,1.35107,1.796877,4.77612,0.490103,1.985079,0.826539,1.267157,0.930928,1.818769,3.008281,2.058346,0.077691,0.171663,0.501245,0.788914,0.546901,0.198268,0.056611,1.338948,1.540991,1.187401,1.223953,1.184697,2.231482,0.929029,3.649413,1.550008,108.474377
min,0.0,1.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,85.0,0.0,3.0,4.0,1.0,0.0,1.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,106.0,0.0,7.0,6.0,3.0,0.0,2.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,6.0
75%,1.0,1.0,140.0,2.0,13.0,10.0,5.0,0.0,5.0,0.0,65.0,0.0,3.0,38.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.14,1.0,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,34.86
max,999.0,30.0,19000.0,241.0,915.0,1235.0,755.0,74.0,298.0,82.0,10398.0,2594.0,2022.0,5980.0,3051.0,0.0,2638.0,610.0,2151.0,2296.0,193.0,605.0,1.0,99.63,89.0,56.0,17.0,100.0,100.0,413.0,46.0,144.0,20.0,20.0,20.0,71.0,71.0,71.0,12.0,44.0,81.0,119.0,89.0,89.0,20.0,163.0,118.0,140.0,86.0,94.0,139.0,72.0,298.0,241.0,3917.0


**3) Calculating Maintainability from the projects:**


---



In [57]:
maintainability = [0] * 215244
investedTime = [0] * 215244
technicalDebtRatio = [0] * 215244

In [58]:
for index, row in df.iterrows():
  projectInvestedTime = df.NumberOfTotalBlocks[index] * timePerBlock
  investedTime[index] = projectInvestedTime

  projectTechnicalDebtRatio = df.TechnicalDebt[index] / projectInvestedTime
  technicalDebtRatio[index] = projectTechnicalDebtRatio

  if projectTechnicalDebtRatio <= 0.05:
    maintainability[index] = 'A'
  elif projectTechnicalDebtRatio > 0.05 and projectTechnicalDebtRatio <= 0.1:
    maintainability[index] = 'B'
  elif projectTechnicalDebtRatio > 0.1 and projectTechnicalDebtRatio <= 0.2:
    maintainability[index] = 'C'
  elif projectTechnicalDebtRatio > 0.2 and projectTechnicalDebtRatio <= 0.5:
    maintainability[index] = 'D'
  elif projectTechnicalDebtRatio > 0.5:
    maintainability[index] = 'E'

**Adding the new columns to the dataset:**


---



In [59]:
df['Maintainability'] = maintainability
df['InvestedTime'] = investedTime
df['TechnicalDebtRatio'] = technicalDebtRatio

In [60]:
df.describe()

Unnamed: 0,AppVersionCode,NumberOfScreens,NumberOfTotalBlocks,NumberOfDefinedFunctionalBlocks,NumberOfUsedFunctionalBlocks,NumberOfEventsBlocks,NumberOfConditionalBlocks,NumberOfLoopsBlocks,NumberOfGlobalVariables,NumberOfLocalVariables,NumberOfUserInterfaceBlocks,NumberOfLayoutBlocks,NumberOfMediaBlocks,NumberOfDrawingAnimationBlocks,NumberOfMapsBlocks,NumberOfChartsBlocks,NumberOfSensorsBlocks,NumberOfSocialBlocks,NumberOfStorageBlocks,NumberOfConnectivityBlocks,NumberOfLegoBlocks,NumberOfExperimentalBlocks,NumberOfScreensErrors,PercentageDuplicateBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,NumberOfParamatersErrors,VariablesNotUsed,PercentageComments,MagicNumbers,NumberOfFunctionsWithTooManyBlocks,NumberOfIfBlocksTooManyNested,MinCiclomaticComplexity,MaxCiclomaticComplexity,AvgCiclomaticComplexity,MinCognitiveComplexity,MaxCognitiveComplexity,AvgCognitiveComplexity,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition,NumberOfVariablesInConditionNotUpdated,NumberOfCamelCaseVariables,NumberOfCamelCaseDefinedFunctionalBlocks,NumberOfPascalCaseVariables,NumberOfPascalCaseDefinedFunctionalBlocks,NumberOfSnakeCaseVariables,NumberOfSnakeCaseDefinedFunctionalBlocks,NumberOfNoneCaseVariables,NumberOfNoneCaseDefinedFunctionalBlocks,TechnicalDebt,InvestedTime,TechnicalDebtRatio
count,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0
mean,1.284844,1.1533,128.620649,1.447999,9.941778,8.266837,3.946633,0.26699,3.463316,0.30812,54.41413,2.488878,5.53595,27.232053,0.171387,0.0,5.503447,0.533511,4.923742,5.066018,0.143052,0.313932,0.000794,1.055744,0.308817,0.021836,0.050111,0.267784,0.365723,1.449992,0.128069,0.58744,0.625202,0.917856,0.724913,0.812845,1.490564,1.048406,0.001942,0.005357,0.039393,0.041855,0.101917,0.001905,0.000929,0.731421,0.522932,0.38402,0.315869,0.288812,0.586818,0.208373,2.345817,0.566794,39.052007,71.25584,0.560644
std,8.437006,0.756459,138.737149,2.617231,13.532433,10.069302,5.811005,0.980861,4.889437,1.11695,128.152058,16.835835,25.883707,58.002135,8.13711,0.0,14.792882,6.055018,23.418029,19.975531,2.90455,4.74796,0.028175,6.495454,1.118109,0.259472,0.263312,1.35107,1.796877,4.77612,0.490103,1.985079,0.826539,1.267157,0.930928,1.818769,3.008281,2.058346,0.077691,0.171663,0.501245,0.788914,0.546901,0.198268,0.056611,1.338948,1.540991,1.187401,1.223953,1.184697,2.231482,0.929029,3.649413,1.550008,108.474377,76.86038,1.684801
min,0.0,1.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,39.334,0.0
25%,1.0,1.0,85.0,0.0,3.0,4.0,1.0,0.0,1.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,47.09,0.0
50%,1.0,1.0,106.0,0.0,7.0,6.0,3.0,0.0,2.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,6.0,58.724,0.101008
75%,1.0,1.0,140.0,2.0,13.0,10.0,5.0,0.0,5.0,0.0,65.0,0.0,3.0,38.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.14,1.0,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,34.86,77.56,0.555401
max,999.0,30.0,19000.0,241.0,915.0,1235.0,755.0,74.0,298.0,82.0,10398.0,2594.0,2022.0,5980.0,3051.0,0.0,2638.0,610.0,2151.0,2296.0,193.0,605.0,1.0,99.63,89.0,56.0,17.0,100.0,100.0,413.0,46.0,144.0,20.0,20.0,20.0,71.0,71.0,71.0,12.0,44.0,81.0,119.0,89.0,89.0,20.0,163.0,118.0,140.0,86.0,94.0,139.0,72.0,298.0,241.0,3917.0,10526.0,35.402552


**Projects with Maintainability = A**


---



In [61]:
projectsWithCategoryA = (df.Maintainability == 'A').sum()
percentageProjectsWithCategoryA = 100 * float(projectsWithCategoryA)/float(215244)

In [62]:
print(projectsWithCategoryA)
print(str(percentageProjectsWithCategoryA) + "%")

99818
46.374347252420506%


**Projects with Maintainability = B**


---



In [63]:
projectsWithCategoryB = (df.Maintainability == 'B').sum()
percentageProjectsWithCategoryB = 100 * float(projectsWithCategoryB)/float(215244)

In [64]:
print(projectsWithCategoryB)
print(str(percentageProjectsWithCategoryB) + "%")

7666
3.561539462191745%


**Projects with Maintainability = C**


---



In [65]:
projectsWithCategoryC = (df.Maintainability == 'C').sum()
percentageProjectsWithCategoryC = 100 * float(projectsWithCategoryC)/float(215244)

In [66]:
print(projectsWithCategoryC)
print(str(percentageProjectsWithCategoryC) + "%")

10043
4.665867573544443%


**Projects with Maintainability = D**


---



In [67]:
projectsWithCategoryD = (df.Maintainability == 'D').sum()
percentageProjectsWithCategoryD = 100 * float(projectsWithCategoryD)/float(215244)

In [68]:
print(projectsWithCategoryD)
print(str(percentageProjectsWithCategoryD) + "%")

38258
17.774246901191205%


**Projects with Maintainability = E**


---



In [69]:
projectsWithCategoryE = (df.Maintainability == 'E').sum()
percentageProjectsWithCategoryE = 100 * float(projectsWithCategoryE)/float(215244)

In [70]:
print(projectsWithCategoryE)
print(str(percentageProjectsWithCategoryE) + "%")

59459
27.623998810652097%


**Graphic by Maintainability Category:**


---



In [71]:
df.groupby('Maintainability').agg({'AppVersionCode': 'mean', 'NumberOfTotalBlocks': 'mean', 'TechnicalDebt': 'mean', 'InvestedTime': 'mean', 'PercentageDuplicateBlocks': 'mean', 'NumberOfDefinedFunctionalBlocksNotUsed': 'mean', 'NumberOfFunctionsWithTooManyBlocks' : 'mean', 'NameOfVariablesErrors' : 'mean', 'NameOfFunctionsErrors' : 'mean', 'VariablesNotUsed' : 'mean', 'MagicNumbers' : 'mean', 'NumberOfParamatersErrors' : 'mean', 'NumberOfIfBlocksTooManyNested' : 'mean', 'PercentageComments' : 'mean', 'NumberOfDivisionByZero' : 'mean', 'NumberOfInfiniteLoops' : 'mean', 'NumberOfVariablesNotInitialized' : 'mean', 'NumberOfIndexOutOfRange' : 'mean', 'NumberOfVariablesInConditionNotUpdated' : 'mean', 'NumberOfFunctionalBlocksWithoutReturn' : 'mean', 'NumberOfWhileLoopsWithoutCondition' : 'mean'})

Unnamed: 0_level_0,AppVersionCode,NumberOfTotalBlocks,TechnicalDebt,InvestedTime,PercentageDuplicateBlocks,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionsWithTooManyBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,VariablesNotUsed,MagicNumbers,NumberOfParamatersErrors,NumberOfIfBlocksTooManyNested,PercentageComments,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfVariablesInConditionNotUpdated,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition
Maintainability,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A,1.334298,114.232343,0.130136,63.284718,4.3e-05,0.039873,0.062384,0.090074,0.006943,0.066351,0.055421,0.038761,0.071911,0.037346,0.000471,0.000952,0.012292,0.002124,0.295147,0.000751,0.000611
B,1.105531,127.581007,5.039384,70.679878,0.000621,0.048135,0.106966,1.395513,0.07018,0.133185,0.085964,0.061571,0.101096,0.708242,0.000913,0.001565,0.086486,0.007044,0.425124,0.000913,0.000261
C,1.300309,155.683859,12.796549,86.248858,0.003137,0.185602,0.093697,1.265857,0.040924,0.473763,0.610176,0.050582,0.226725,0.787511,0.005576,0.003286,0.108334,0.056358,0.385343,0.001294,0.000996
D,1.24408,159.730697,30.998853,88.490806,0.010921,0.132992,0.224685,0.376287,0.044566,0.383005,1.13077,0.051231,0.619138,0.734146,0.003189,0.004104,0.062523,0.066783,1.14397,0.001542,0.000627
E,1.248558,128.32091,118.394499,71.089784,3.814129,0.17888,0.184699,0.330867,0.022755,0.514371,4.314267,0.066886,1.556131,0.564535,0.003128,0.014396,0.052288,0.094553,1.296322,0.004305,0.001732
