diff --git a/crosstab_iteration.ipynb b/crosstab_iteration.ipynb new file mode 100644 index 0000000..13d8f64 --- /dev/null +++ b/crosstab_iteration.ipynb @@ -0,0 +1,479 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Finished\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "import pandas as pd\n", + "import os\n", + "from scipy.stats import chi2_contingency as chi2\n", + "import math \n", + "print(\"Finished\")" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "metadata": {}, + "outputs": [], + "source": [ + "def findStatis(coverage):\n", + " totalS = len(coverage.loc[0,:])-1\n", + " \n", + " df = pd.DataFrame(np.nan,index=coverage.index,columns = ['s','r'])\n", + " result = pd.DataFrame(np.nan, index =range(1,totalS), columns = ['chi-square','M','proportion','suspicious'])\n", + " \n", + " print(len(coverage.loc[0,:]))\n", + " \n", + " row = 1\n", + " for i in range(1,totalS):\n", + "# df['s'] = coverage.iloc[:,i]\n", + "# df['r'] = coverage['r']\n", + " s = pd.Categorical(coverage.iloc[:,i],categories=[0,1])\n", + " r = pd.Categorical(coverage['r'],categories=[0,1])\n", + " if checkDifference(s)==False:\n", + " result.loc[row] = np.array([0,0,1,0])\n", + " row+=1\n", + " continue\n", + " s1_tab = pd.crosstab(s,r,margins=True)\n", + " s1_tab.index = ['uncovered','covered','coltotal']\n", + " s1_tab.columns = ['passed','failed','rowtotal']\n", + " print(s1_tab)\n", + " \n", + " stats, p, dof, expected = chi2(s1_tab)\n", + " N = s1_tab.iloc[2]['rowtotal']\n", + " m = stats/N\n", + " \n", + " proportion = s1_tab.iloc[1]['failed']/s1_tab.iloc[2]['failed']/(s1_tab.iloc[1]['passed']/s1_tab.iloc[2]['passed'])\n", + " \n", + " if proportion == 1:\n", + " final = 0\n", + " elif proportion < 1:\n", + " final = -m\n", + " else: final = m\n", + " \n", + " result.loc[row] = np.array([stats,m,proportion,final])\n", + " row+=1\n", + " \n", + " return result" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "metadata": {}, + "outputs": [], + "source": [ + "def checkDifference(s):\n", + " for i in range(1,len(s)):\n", + " if(s[i]!=s[i-1]):\n", + " return True\n", + " return False" + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "metadata": {}, + "outputs": [], + "source": [ + "def sort(result):\n", + " rank_result = result.sort_values(by=['suspicious'],ascending = False)\n", + " rank_result['rank'] = pd.Series(range(1,len(rank_result.index)+1),index = rank_result.index)\n", + " print(rank_result)" + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "RangeIndex(start=0, stop=20, step=1)\n", + "============ Cross tab for each statement =============\n", + "92\n", + " passed failed rowtotal\n", + "uncovered 14 0 14\n", + "covered 5 1 6\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 15 0 15\n", + "covered 4 1 5\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 15 0 15\n", + "covered 4 1 5\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 0 17\n", + "covered 2 1 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 19 0 19\n", + "covered 0 1 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 0 17\n", + "covered 2 1 3\n", + "coltotal 19 1 20\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "F:\\Anaconda\\lib\\site-packages\\ipykernel_launcher.py:28: RuntimeWarning: divide by zero encountered in double_scalars\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 16 1 17\n", + "covered 3 0 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 5 1 6\n", + "covered 14 0 14\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 5 1 6\n", + "covered 14 0 14\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 15 1 16\n", + "covered 4 0 4\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 16 1 17\n", + "covered 3 0 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 9 1 10\n", + "covered 10 0 10\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 9 1 10\n", + "covered 10 0 10\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 12 1 13\n", + "covered 7 0 7\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 16 1 17\n", + "covered 3 0 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 16 1 17\n", + "covered 3 0 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 14 1 15\n", + "covered 5 0 5\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 15 1 16\n", + "covered 4 0 4\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 15 1 16\n", + "covered 4 0 4\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 16 1 17\n", + "covered 3 0 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 16 1 17\n", + "covered 3 0 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 18 1 19\n", + "covered 1 0 1\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 16 1 17\n", + "covered 3 0 3\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 17 1 18\n", + "covered 2 0 2\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 0 1 1\n", + "covered 19 0 19\n", + "coltotal 19 1 20\n", + " passed failed rowtotal\n", + "uncovered 19 0 19\n", + "covered 0 1 1\n", + "coltotal 19 1 20\n", + "============== Suspicious levels ====================\n", + " chi-square M proportion suspicious\n", + "1 0.000000 0.000000 1.000000 0.000000\n", + "2 0.000000 0.000000 1.000000 0.000000\n", + "3 0.000000 0.000000 1.000000 0.000000\n", + "4 2.456140 0.122807 3.800000 0.122807\n", + "5 3.157895 0.157895 4.750000 0.157895\n", + "6 0.000000 0.000000 1.000000 0.000000\n", + "7 3.157895 0.157895 4.750000 0.157895\n", + "8 0.116959 0.005848 0.000000 -0.005848\n", + "9 5.964912 0.298246 9.500000 0.298246\n", + "10 20.000000 1.000000 inf 1.000000\n", + "11 5.964912 0.298246 9.500000 0.298246\n", + "12 0.116959 0.005848 0.000000 -0.005848\n", + "13 0.185759 0.009288 0.000000 -0.009288\n", + "14 0.055402 0.002770 0.000000 -0.002770\n", + "15 0.055402 0.002770 0.000000 -0.002770\n", + "16 0.055402 0.002770 0.000000 -0.002770\n", + "17 0.000000 0.000000 1.000000 0.000000\n", + "18 0.055402 0.002770 0.000000 -0.002770\n", + "19 0.055402 0.002770 0.000000 -0.002770\n", + "20 0.055402 0.002770 0.000000 -0.002770\n", + "21 0.000000 0.000000 1.000000 0.000000\n", + "22 0.000000 0.000000 1.000000 0.000000\n", + "23 0.000000 0.000000 1.000000 0.000000\n", + "24 0.000000 0.000000 1.000000 0.000000\n", + "25 0.000000 0.000000 1.000000 0.000000\n", + "26 0.000000 0.000000 1.000000 0.000000\n", + "27 0.000000 0.000000 1.000000 0.000000\n", + "28 0.000000 0.000000 1.000000 0.000000\n", + "29 0.000000 0.000000 1.000000 0.000000\n", + "30 0.000000 0.000000 1.000000 0.000000\n", + ".. ... ... ... ...\n", + "61 0.185759 0.009288 0.000000 -0.009288\n", + "62 0.185759 0.009288 0.000000 -0.009288\n", + "63 0.055402 0.002770 0.000000 -0.002770\n", + "64 0.185759 0.009288 0.000000 -0.009288\n", + "65 0.116959 0.005848 0.000000 -0.005848\n", + "66 0.116959 0.005848 0.000000 -0.005848\n", + "67 0.116959 0.005848 0.000000 -0.005848\n", + "68 0.000000 0.000000 1.000000 0.000000\n", + "69 0.000000 0.000000 1.000000 0.000000\n", + "70 0.000000 0.000000 1.000000 0.000000\n", + "71 0.000000 0.000000 1.000000 0.000000\n", + "72 0.000000 0.000000 1.000000 0.000000\n", + "73 0.000000 0.000000 1.000000 0.000000\n", + "74 0.000000 0.000000 1.000000 0.000000\n", + "75 0.000000 0.000000 1.000000 0.000000\n", + "76 0.000000 0.000000 1.000000 0.000000\n", + "77 0.000000 0.000000 1.000000 0.000000\n", + "78 0.000000 0.000000 1.000000 0.000000\n", + "79 0.000000 0.000000 1.000000 0.000000\n", + "80 0.000000 0.000000 1.000000 0.000000\n", + "81 0.000000 0.000000 1.000000 0.000000\n", + "82 0.000000 0.000000 1.000000 0.000000\n", + "83 20.000000 1.000000 0.000000 -1.000000\n", + "84 0.000000 0.000000 1.000000 0.000000\n", + "85 20.000000 1.000000 inf 1.000000\n", + "86 0.000000 0.000000 1.000000 0.000000\n", + "87 0.000000 0.000000 1.000000 0.000000\n", + "88 0.000000 0.000000 1.000000 0.000000\n", + "89 0.000000 0.000000 1.000000 0.000000\n", + "90 0.000000 0.000000 1.000000 0.000000\n", + "\n", + "[90 rows x 4 columns]\n", + "=================== Ranking ==========================\n", + " chi-square M proportion suspicious rank\n", + "10 20.000000 1.000000 inf 1.000000 1\n", + "85 20.000000 1.000000 inf 1.000000 2\n", + "11 5.964912 0.298246 9.500000 0.298246 3\n", + "9 5.964912 0.298246 9.500000 0.298246 4\n", + "5 3.157895 0.157895 4.750000 0.157895 5\n", + "7 3.157895 0.157895 4.750000 0.157895 6\n", + "4 2.456140 0.122807 3.800000 0.122807 7\n", + "69 0.000000 0.000000 1.000000 0.000000 8\n", + "74 0.000000 0.000000 1.000000 0.000000 9\n", + "73 0.000000 0.000000 1.000000 0.000000 10\n", + "72 0.000000 0.000000 1.000000 0.000000 11\n", + "71 0.000000 0.000000 1.000000 0.000000 12\n", + "70 0.000000 0.000000 1.000000 0.000000 13\n", + "1 0.000000 0.000000 1.000000 0.000000 14\n", + "68 0.000000 0.000000 1.000000 0.000000 15\n", + "76 0.000000 0.000000 1.000000 0.000000 16\n", + "60 0.000000 0.000000 1.000000 0.000000 17\n", + "2 0.000000 0.000000 1.000000 0.000000 18\n", + "45 0.000000 0.000000 1.000000 0.000000 19\n", + "75 0.000000 0.000000 1.000000 0.000000 20\n", + "80 0.000000 0.000000 1.000000 0.000000 21\n", + "77 0.000000 0.000000 1.000000 0.000000 22\n", + "78 0.000000 0.000000 1.000000 0.000000 23\n", + "79 0.000000 0.000000 1.000000 0.000000 24\n", + "34 0.000000 0.000000 1.000000 0.000000 25\n", + "81 0.000000 0.000000 1.000000 0.000000 26\n", + "82 0.000000 0.000000 1.000000 0.000000 27\n", + "84 0.000000 0.000000 1.000000 0.000000 28\n", + "86 0.000000 0.000000 1.000000 0.000000 29\n", + "87 0.000000 0.000000 1.000000 0.000000 30\n", + ".. ... ... ... ... ...\n", + "58 0.116959 0.005848 0.000000 -0.005848 61\n", + "55 0.116959 0.005848 0.000000 -0.005848 62\n", + "8 0.116959 0.005848 0.000000 -0.005848 63\n", + "12 0.116959 0.005848 0.000000 -0.005848 64\n", + "42 0.116959 0.005848 0.000000 -0.005848 65\n", + "43 0.116959 0.005848 0.000000 -0.005848 66\n", + "44 0.116959 0.005848 0.000000 -0.005848 67\n", + "59 0.116959 0.005848 0.000000 -0.005848 68\n", + "50 0.116959 0.005848 0.000000 -0.005848 69\n", + "67 0.116959 0.005848 0.000000 -0.005848 70\n", + "57 0.116959 0.005848 0.000000 -0.005848 71\n", + "66 0.116959 0.005848 0.000000 -0.005848 72\n", + "65 0.116959 0.005848 0.000000 -0.005848 73\n", + "64 0.185759 0.009288 0.000000 -0.009288 74\n", + "40 0.185759 0.009288 0.000000 -0.009288 75\n", + "51 0.185759 0.009288 0.000000 -0.009288 76\n", + "49 0.185759 0.009288 0.000000 -0.009288 77\n", + "61 0.185759 0.009288 0.000000 -0.009288 78\n", + "13 0.185759 0.009288 0.000000 -0.009288 79\n", + "62 0.185759 0.009288 0.000000 -0.009288 80\n", + "38 0.263158 0.013158 0.000000 -0.013158 81\n", + "54 0.263158 0.013158 0.000000 -0.013158 82\n", + "56 0.263158 0.013158 0.000000 -0.013158 83\n", + "53 0.350877 0.017544 0.000000 -0.017544 84\n", + "48 0.566802 0.028340 0.000000 -0.028340 85\n", + "47 1.052632 0.052632 0.000000 -0.052632 86\n", + "46 1.052632 0.052632 0.000000 -0.052632 87\n", + "36 2.456140 0.122807 0.000000 -0.122807 88\n", + "37 2.456140 0.122807 0.000000 -0.122807 89\n", + "83 20.000000 1.000000 0.000000 -1.000000 90\n", + "\n", + "[90 rows x 5 columns]\n" + ] + } + ], + "source": [ + "if __name__ ==\"__main__\":\n", + " s = 'E:\\\\1819SEM1\\\\SoftwareEngineering\\\\Debugger'\n", + " os.chdir(s)\n", + " coverage = pd.read_csv('coverage_bug1.csv')\n", + " print(coverage.index)\n", + " print(\"============ Cross tab for each statement =============\")\n", + " result = findStatis(coverage)\n", + " print(\"============== Suspicious levels ====================\")\n", + " print(result) \n", + " print(\"=================== Ranking ==========================\")\n", + " sort(result)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.7.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}