#**Study Case: Evaluating Quality from Projects of MIT App Inventor**




---



In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances_argmin_min
import scipy.cluster.hierarchy as hc
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn.cluster import AgglomerativeClustering

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

%matplotlib inline

from mpl_toolkits.mplot3d import Axes3D
plt.rcParams['figure.figsize'] = (16, 9)
plt.style.use('ggplot')

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
file = "/content/drive/My Drive/appinventor_analysis/dataset.csv"

**1) Statidistic data extracted from dataset:**


---



In [4]:
df = pd.read_csv(file, sep=';')

In [5]:
df.describe()

Unnamed: 0,AppVersionCode,NumberOfScreens,NumberOfTotalBlocks,NumberOfDefinedFunctionalBlocks,NumberOfUsedFunctionalBlocks,NumberOfEventsBlocks,NumberOfConditionalBlocks,NumberOfLoopsBlocks,NumberOfGlobalVariables,NumberOfLocalVariables,NumberOfUserInterfaceBlocks,NumberOfLayoutBlocks,NumberOfMediaBlocks,NumberOfDrawingAnimationBlocks,NumberOfMapsBlocks,NumberOfChartsBlocks,NumberOfSensorsBlocks,NumberOfSocialBlocks,NumberOfStorageBlocks,NumberOfConnectivityBlocks,NumberOfLegoBlocks,NumberOfExperimentalBlocks,NumberOfScreensErrors,PercentageDuplicateBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,NumberOfParamatersErrors,VariablesNotUsed,PercentageComments,MagicNumbers,NumberOfFunctionsWithTooManyBlocks,NumberOfIfBlocksTooManyNested,MinCiclomaticComplexity,MaxCiclomaticComplexity,AvgCiclomaticComplexity,MinCognitiveComplexity,MaxCognitiveComplexity,AvgCognitiveComplexity,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition,NumberOfVariablesInConditionNotUpdated,NumberOfCamelCaseVariables,NumberOfCamelCaseDefinedFunctionalBlocks,NumberOfPascalCaseVariables,NumberOfPascalCaseDefinedFunctionalBlocks,NumberOfSnakeCaseVariables,NumberOfSnakeCaseDefinedFunctionalBlocks,NumberOfNoneCaseVariables,NumberOfNoneCaseDefinedFunctionalBlocks
count,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0
mean,1.284844,1.1533,128.620649,1.447999,9.941778,8.266837,3.946633,0.26699,3.463316,0.30812,54.41413,2.488878,5.53595,27.232053,0.171387,0.0,5.503447,0.533511,4.923742,5.066018,0.143052,0.313932,0.000794,1.055744,0.308817,0.021836,0.050111,0.267784,0.365723,1.449992,0.128069,0.58744,0.625202,0.917856,0.724913,0.812845,1.490564,1.048406,0.001942,0.005357,0.039393,0.041855,0.101917,0.001905,0.000929,0.731421,0.522932,0.38402,0.315869,0.288812,0.586818,0.208373,2.345817,0.566794
std,8.437006,0.756459,138.737149,2.617231,13.532433,10.069302,5.811005,0.980861,4.889437,1.11695,128.152058,16.835835,25.883707,58.002135,8.13711,0.0,14.792882,6.055018,23.418029,19.975531,2.90455,4.74796,0.028175,6.495454,1.118109,0.259472,0.263312,1.35107,1.796877,4.77612,0.490103,1.985079,0.826539,1.267157,0.930928,1.818769,3.008281,2.058346,0.077691,0.171663,0.501245,0.788914,0.546901,0.198268,0.056611,1.338948,1.540991,1.187401,1.223953,1.184697,2.231482,0.929029,3.649413,1.550008
min,0.0,1.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,85.0,0.0,3.0,4.0,1.0,0.0,1.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,106.0,0.0,7.0,6.0,3.0,0.0,2.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0
75%,1.0,1.0,140.0,2.0,13.0,10.0,5.0,0.0,5.0,0.0,65.0,0.0,3.0,38.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.14,1.0,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0
max,999.0,30.0,19000.0,241.0,915.0,1235.0,755.0,74.0,298.0,82.0,10398.0,2594.0,2022.0,5980.0,3051.0,0.0,2638.0,610.0,2151.0,2296.0,193.0,605.0,1.0,99.63,89.0,56.0,17.0,100.0,100.0,413.0,46.0,144.0,20.0,20.0,20.0,71.0,71.0,71.0,12.0,44.0,81.0,119.0,89.0,89.0,20.0,163.0,118.0,140.0,86.0,94.0,139.0,72.0,298.0,241.0


**2) Calculating Technical Debt from the projects:**


---



In [6]:
timePerBlock = 10

In [7]:
technicalDebt = [0] * 215244

**Code Smells & Bugs Level 1**

---



**M25**: NameOfVariableError

In [8]:
remediationEffort = 2

for index, row in df.iterrows():
  if(row.NameOfVariablesErrors > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NameOfVariablesErrors * remediationEffort)


**M26**: NameOfFunctionError

In [9]:
remediationEffort = 2

for index, row in df.iterrows():
  if(row.NameOfFunctionsErrors > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NameOfFunctionsErrors * remediationEffort)

**M45**: PercentageComments

In [10]:
maximumPercentageOfComments = 0.3
remediationEffort = 2

for index, row in df.iterrows():

  if(row.PercentageComments > maximumPercentageOfComments):
    percentageOfCommentsToDelete = row.PercentageComments - maximumPercentageOfComments
    technicalDebt[index] = technicalDebt[index] + (percentageOfCommentsToDelete * remediationEffort)

**Code Smells & Bugs Level 2**

---



**M23**: NumberOfDefinedFunctionalBlocksNotUsed

In [11]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.NumberOfDefinedFunctionalBlocksNotUsed > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfDefinedFunctionalBlocksNotUsed * remediationEffort)

**M35**: VariablesNotUsed

In [12]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.VariablesNotUsed > 1):
    technicalDebt[index] = technicalDebt[index] + (row.VariablesNotUsed * remediationEffort)

**M43**: NumberOfParametersErrors

In [13]:
remediationEffort = 20

for index, row in df.iterrows():
  if(row.NumberOfParamatersErrors > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfParamatersErrors * remediationEffort)

**M48**: NumberOfVariableNotInitialized

In [14]:
remediationEffort = 2

for index, row in df.iterrows():
  if(row.NumberOfVariablesNotInitialized > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfVariablesNotInitialized * remediationEffort)

**M49**: NumberOfIndexOutOfRange

In [15]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.NumberOfIndexOutOfRange > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfIndexOutOfRange * remediationEffort)

**M50**: NumberOfVariablesInConditionNotUpdated

In [16]:
remediationEffort = 15

for index, row in df.iterrows():
  if(row.NumberOfVariablesInConditionNotUpdated > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfVariablesInConditionNotUpdated * remediationEffort)

**M51**: NumberOfFunctionalBlocksWithoutReturn

In [17]:
remediationEffort = 20

for index, row in df.iterrows():
  if(row.NumberOfFunctionalBlocksWithoutReturn > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfFunctionalBlocksWithoutReturn * remediationEffort)

**Code Smells & Bugs Level 3**

---



**M42**: MagicNumbers

In [18]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.MagicNumbers > 1):
    technicalDebt[index] = technicalDebt[index] + (row.MagicNumbers * remediationEffort)

**M44**: NumberOfIfBlocksTooManyNested

In [19]:
remediationEffort = 10

for index, row in df.iterrows():
  if(row.NumberOfIfBlocksTooManyNested > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfIfBlocksTooManyNested * remediationEffort)

**M46**: NumberOfDivisionByZero

In [20]:
remediationEffort = 5

for index, row in df.iterrows():
  if(row.NumberOfDivisionByZero > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfDivisionByZero * remediationEffort)

**M47**: NumberOfInfiniteLoops

In [21]:
remediationEffort = 15

for index, row in df.iterrows():
  if(row.NumberOfInfiniteLoops > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfInfiniteLoops * remediationEffort)

**M52**: NumberOfWhileLoopsWithoutCondition

In [22]:
remediationEffort = 15

for index, row in df.iterrows():
  if(row.NumberOfWhileLoopsWithoutCondition > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfWhileLoopsWithoutCondition * remediationEffort)

**Code Smells Level 4**


---



**M22**: PercentageDuplicateBlocks

In [23]:
maximumPercentageOfDuplicateBlocks = 0.2
remediationEffort = 15

for index, row in df.iterrows():
  if(row.PercentageDuplicateBlocks > maximumPercentageOfDuplicateBlocks):
    percentageOfDuplicateBlocksToDelete = row.PercentageDuplicateBlocks - maximumPercentageOfDuplicateBlocks
    technicalDebt[index] = technicalDebt[index] + (percentageOfDuplicateBlocksToDelete * remediationEffort)

**M24**: NumberOfFunctionsWithTooManyBlocks

In [24]:
remediationEffort = 20

for index, row in df.iterrows():
  if(row.NumberOfFunctionsWithTooManyBlocks > 1):
    technicalDebt[index] = technicalDebt[index] + (row.NumberOfFunctionsWithTooManyBlocks * remediationEffort)

**Adding the new column "TechnicalDebt" to the dataset:**


---



In [25]:
df['TechnicalDebt'] = technicalDebt

In [26]:
df.describe()

Unnamed: 0,AppVersionCode,NumberOfScreens,NumberOfTotalBlocks,NumberOfDefinedFunctionalBlocks,NumberOfUsedFunctionalBlocks,NumberOfEventsBlocks,NumberOfConditionalBlocks,NumberOfLoopsBlocks,NumberOfGlobalVariables,NumberOfLocalVariables,NumberOfUserInterfaceBlocks,NumberOfLayoutBlocks,NumberOfMediaBlocks,NumberOfDrawingAnimationBlocks,NumberOfMapsBlocks,NumberOfChartsBlocks,NumberOfSensorsBlocks,NumberOfSocialBlocks,NumberOfStorageBlocks,NumberOfConnectivityBlocks,NumberOfLegoBlocks,NumberOfExperimentalBlocks,NumberOfScreensErrors,PercentageDuplicateBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,NumberOfParamatersErrors,VariablesNotUsed,PercentageComments,MagicNumbers,NumberOfFunctionsWithTooManyBlocks,NumberOfIfBlocksTooManyNested,MinCiclomaticComplexity,MaxCiclomaticComplexity,AvgCiclomaticComplexity,MinCognitiveComplexity,MaxCognitiveComplexity,AvgCognitiveComplexity,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition,NumberOfVariablesInConditionNotUpdated,NumberOfCamelCaseVariables,NumberOfCamelCaseDefinedFunctionalBlocks,NumberOfPascalCaseVariables,NumberOfPascalCaseDefinedFunctionalBlocks,NumberOfSnakeCaseVariables,NumberOfSnakeCaseDefinedFunctionalBlocks,NumberOfNoneCaseVariables,NumberOfNoneCaseDefinedFunctionalBlocks,TechnicalDebt
count,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0
mean,1.284844,1.1533,128.620649,1.447999,9.941778,8.266837,3.946633,0.26699,3.463316,0.30812,54.41413,2.488878,5.53595,27.232053,0.171387,0.0,5.503447,0.533511,4.923742,5.066018,0.143052,0.313932,0.000794,1.055744,0.308817,0.021836,0.050111,0.267784,0.365723,1.449992,0.128069,0.58744,0.625202,0.917856,0.724913,0.812845,1.490564,1.048406,0.001942,0.005357,0.039393,0.041855,0.101917,0.001905,0.000929,0.731421,0.522932,0.38402,0.315869,0.288812,0.586818,0.208373,2.345817,0.566794,39.052007
std,8.437006,0.756459,138.737149,2.617231,13.532433,10.069302,5.811005,0.980861,4.889437,1.11695,128.152058,16.835835,25.883707,58.002135,8.13711,0.0,14.792882,6.055018,23.418029,19.975531,2.90455,4.74796,0.028175,6.495454,1.118109,0.259472,0.263312,1.35107,1.796877,4.77612,0.490103,1.985079,0.826539,1.267157,0.930928,1.818769,3.008281,2.058346,0.077691,0.171663,0.501245,0.788914,0.546901,0.198268,0.056611,1.338948,1.540991,1.187401,1.223953,1.184697,2.231482,0.929029,3.649413,1.550008,108.474377
min,0.0,1.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,85.0,0.0,3.0,4.0,1.0,0.0,1.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,106.0,0.0,7.0,6.0,3.0,0.0,2.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,6.0
75%,1.0,1.0,140.0,2.0,13.0,10.0,5.0,0.0,5.0,0.0,65.0,0.0,3.0,38.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.14,1.0,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,34.86
max,999.0,30.0,19000.0,241.0,915.0,1235.0,755.0,74.0,298.0,82.0,10398.0,2594.0,2022.0,5980.0,3051.0,0.0,2638.0,610.0,2151.0,2296.0,193.0,605.0,1.0,99.63,89.0,56.0,17.0,100.0,100.0,413.0,46.0,144.0,20.0,20.0,20.0,71.0,71.0,71.0,12.0,44.0,81.0,119.0,89.0,89.0,20.0,163.0,118.0,140.0,86.0,94.0,139.0,72.0,298.0,241.0,3917.0


**3) Calculating Maintainability from the projects:**


---



In [27]:
maintainability = [0] * 215244

In [28]:
for index, row in df.iterrows():
  investedTime = df.NumberOfTotalBlocks[index] * timePerBlock
  technicalDebtRatio = df.TechnicalDebt[index] / investedTime

  if technicalDebtRatio <= 0.05:
    maintainability[index] = 'A'
  elif technicalDebtRatio >= 0.06 and technicalDebtRatio <= 0.1:
    maintainability[index] = 'B'
  elif technicalDebtRatio >= 0.11 and technicalDebtRatio <= 0.2:
    maintainability[index] = 'C'
  elif technicalDebtRatio >= 0.21 and technicalDebtRatio <= 0.5:
    maintainability[index] = 'D'
  else:
    maintainability[index] = 'E'

**Adding the new column "Time" to the dataset:**


---



In [29]:
df['Maintainability'] = maintainability

In [30]:
df.describe()

Unnamed: 0,AppVersionCode,NumberOfScreens,NumberOfTotalBlocks,NumberOfDefinedFunctionalBlocks,NumberOfUsedFunctionalBlocks,NumberOfEventsBlocks,NumberOfConditionalBlocks,NumberOfLoopsBlocks,NumberOfGlobalVariables,NumberOfLocalVariables,NumberOfUserInterfaceBlocks,NumberOfLayoutBlocks,NumberOfMediaBlocks,NumberOfDrawingAnimationBlocks,NumberOfMapsBlocks,NumberOfChartsBlocks,NumberOfSensorsBlocks,NumberOfSocialBlocks,NumberOfStorageBlocks,NumberOfConnectivityBlocks,NumberOfLegoBlocks,NumberOfExperimentalBlocks,NumberOfScreensErrors,PercentageDuplicateBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,NumberOfParamatersErrors,VariablesNotUsed,PercentageComments,MagicNumbers,NumberOfFunctionsWithTooManyBlocks,NumberOfIfBlocksTooManyNested,MinCiclomaticComplexity,MaxCiclomaticComplexity,AvgCiclomaticComplexity,MinCognitiveComplexity,MaxCognitiveComplexity,AvgCognitiveComplexity,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition,NumberOfVariablesInConditionNotUpdated,NumberOfCamelCaseVariables,NumberOfCamelCaseDefinedFunctionalBlocks,NumberOfPascalCaseVariables,NumberOfPascalCaseDefinedFunctionalBlocks,NumberOfSnakeCaseVariables,NumberOfSnakeCaseDefinedFunctionalBlocks,NumberOfNoneCaseVariables,NumberOfNoneCaseDefinedFunctionalBlocks,TechnicalDebt
count,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0,215244.0
mean,1.284844,1.1533,128.620649,1.447999,9.941778,8.266837,3.946633,0.26699,3.463316,0.30812,54.41413,2.488878,5.53595,27.232053,0.171387,0.0,5.503447,0.533511,4.923742,5.066018,0.143052,0.313932,0.000794,1.055744,0.308817,0.021836,0.050111,0.267784,0.365723,1.449992,0.128069,0.58744,0.625202,0.917856,0.724913,0.812845,1.490564,1.048406,0.001942,0.005357,0.039393,0.041855,0.101917,0.001905,0.000929,0.731421,0.522932,0.38402,0.315869,0.288812,0.586818,0.208373,2.345817,0.566794,39.052007
std,8.437006,0.756459,138.737149,2.617231,13.532433,10.069302,5.811005,0.980861,4.889437,1.11695,128.152058,16.835835,25.883707,58.002135,8.13711,0.0,14.792882,6.055018,23.418029,19.975531,2.90455,4.74796,0.028175,6.495454,1.118109,0.259472,0.263312,1.35107,1.796877,4.77612,0.490103,1.985079,0.826539,1.267157,0.930928,1.818769,3.008281,2.058346,0.077691,0.171663,0.501245,0.788914,0.546901,0.198268,0.056611,1.338948,1.540991,1.187401,1.223953,1.184697,2.231482,0.929029,3.649413,1.550008,108.474377
min,0.0,1.0,71.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,1.0,1.0,85.0,0.0,3.0,4.0,1.0,0.0,1.0,0.0,15.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
50%,1.0,1.0,106.0,0.0,7.0,6.0,3.0,0.0,2.0,0.0,34.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,6.0
75%,1.0,1.0,140.0,2.0,13.0,10.0,5.0,0.0,5.0,0.0,65.0,0.0,3.0,38.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,2.0,1.14,1.0,2.0,1.2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,34.86
max,999.0,30.0,19000.0,241.0,915.0,1235.0,755.0,74.0,298.0,82.0,10398.0,2594.0,2022.0,5980.0,3051.0,0.0,2638.0,610.0,2151.0,2296.0,193.0,605.0,1.0,99.63,89.0,56.0,17.0,100.0,100.0,413.0,46.0,144.0,20.0,20.0,20.0,71.0,71.0,71.0,12.0,44.0,81.0,119.0,89.0,89.0,20.0,163.0,118.0,140.0,86.0,94.0,139.0,72.0,298.0,241.0,3917.0


**Projects with Maintainability = A**


---



In [31]:
projectsWithCategoryA = (df.Maintainability == 'A').sum()
percentageProjectsWithCategoryA = 100 * float(projectsWithCategoryA)/float(215244)

In [32]:
print(projectsWithCategoryA)
print(str(percentageProjectsWithCategoryA) + "%")

187159
86.9520172455446%


**Projects with Maintainability = B**


---



In [33]:
projectsWithCategoryB = (df.Maintainability == 'B').sum()
percentageProjectsWithCategoryB = 100 * float(projectsWithCategoryB)/float(215244)

In [34]:
print(projectsWithCategoryB)
print(str(percentageProjectsWithCategoryB) + "%")

13027
6.052201222798312%


**Projects with Maintainability = C**


---



In [35]:
projectsWithCategoryC = (df.Maintainability == 'C').sum()
percentageProjectsWithCategoryC = 100 * float(projectsWithCategoryC)/float(215244)

In [36]:
print(projectsWithCategoryC)
print(str(percentageProjectsWithCategoryC) + "%")

3262
1.5154893980784598%


**Projects with Maintainability = D**


---



In [37]:
projectsWithCategoryD = (df.Maintainability == 'D').sum()
percentageProjectsWithCategoryD = 100 * float(projectsWithCategoryD)/float(215244)

In [38]:
print(projectsWithCategoryD)
print(str(percentageProjectsWithCategoryD) + "%")

3163
1.4694950846481203%


**Projects with Maintainability = E**


---



In [39]:
projectsWithCategoryE = (df.Maintainability == 'E').sum()
percentageProjectsWithCategoryE = 100 * float(projectsWithCategoryE)/float(215244)

In [40]:
print(projectsWithCategoryE)
print(str(percentageProjectsWithCategoryE) + "%")

8633
4.010797048930516%


**Graphic by Maintainability Category:**


---



In [41]:
df.groupby('Maintainability').agg({'AppVersionCode': 'mean', 'NumberOfTotalBlocks': 'mean', 'TechnicalDebt': 'mean', 'PercentageDuplicateBlocks': 'mean', 'NumberOfDefinedFunctionalBlocksNotUsed': 'mean', 'NumberOfFunctionsWithTooManyBlocks' : 'mean', 'NameOfVariablesErrors' : 'mean', 'NameOfFunctionsErrors' : 'mean', 'VariablesNotUsed' : 'mean', 'MagicNumbers' : 'mean', 'NumberOfParamatersErrors' : 'mean', 'NumberOfIfBlocksTooManyNested' : 'mean', 'PercentageComments' : 'mean', 'NumberOfDivisionByZero' : 'mean', 'NumberOfInfiniteLoops' : 'mean', 'NumberOfVariablesNotInitialized' : 'mean', 'NumberOfIndexOutOfRange' : 'mean', 'NumberOfVariablesInConditionNotUpdated' : 'mean', 'NumberOfFunctionalBlocksWithoutReturn' : 'mean', 'NumberOfWhileLoopsWithoutCondition' : 'mean'})

Unnamed: 0_level_0,AppVersionCode,NumberOfTotalBlocks,TechnicalDebt,PercentageDuplicateBlocks,NumberOfDefinedFunctionalBlocksNotUsed,NumberOfFunctionsWithTooManyBlocks,NameOfVariablesErrors,NameOfFunctionsErrors,VariablesNotUsed,MagicNumbers,NumberOfParamatersErrors,NumberOfIfBlocksTooManyNested,PercentageComments,NumberOfDivisionByZero,NumberOfInfiniteLoops,NumberOfVariablesNotInitialized,NumberOfIndexOutOfRange,NumberOfVariablesInConditionNotUpdated,NumberOfFunctionalBlocksWithoutReturn,NumberOfWhileLoopsWithoutCondition
Maintainability,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
A,1.30519,128.440091,15.144561,0.008449,0.081086,0.122607,0.308054,0.02142,0.210655,0.640247,0.043573,0.341859,0.323342,0.001929,0.003318,0.036461,0.034949,0.68297,0.001106,0.00063
B,1.057342,131.095724,99.650706,0.609793,0.217241,0.174484,0.355339,0.029017,0.592232,7.841867,0.081677,2.769402,0.785349,0.001766,0.017272,0.049129,0.082751,1.134797,0.00238,0.000998
C,1.168608,135.783262,196.500383,5.971652,0.24954,0.145616,0.295218,0.021766,1.076947,11.998467,0.091968,2.524831,0.652774,0.005518,0.031882,0.091355,0.121398,0.974555,0.003066,0.003679
D,1.06007,129.361998,417.944698,25.289624,0.254189,0.084414,0.207082,0.027822,0.552956,4.222574,0.01644,1.018653,0.349649,0.001897,0.021182,0.085046,0.0822,0.559911,0.008852,0.006639
E,1.313333,125.822194,267.598313,13.697093,0.267925,0.185799,0.297579,0.017839,0.60651,4.358045,0.140739,1.728947,0.548756,0.001158,0.015754,0.051894,0.085023,1.144098,0.015522,0.00417
