-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathAprioriAssignment(UE173085)
1 lines (1 loc) · 6.67 KB
/
AprioriAssignment(UE173085)
1
{"nbformat":4,"nbformat_minor":0,"metadata":{"kernelspec":{"display_name":"Python 3","language":"python","name":"python3"},"language_info":{"codemirror_mode":{"name":"ipython","version":3},"file_extension":".py","mimetype":"text/x-python","name":"python","nbconvert_exporter":"python","pygments_lexer":"ipython3","version":"3.6.7"},"colab":{"name":"AprioriAssignment(UE173087)","provenance":[{"file_id":"1EOJY9HNR4u4jPdPDH4XKyl30woFhEML0","timestamp":1586952134494}]}},"cells":[{"cell_type":"markdown","metadata":{"id":"LGFOZnuBtc1V","colab_type":"text"},"source":["#### This notebook is to understand the Apriori Algorithm"]},{"cell_type":"code","metadata":{"id":"RcgxIwZotc1X","colab_type":"code","colab":{}},"source":["from collections import Counter,OrderedDict"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"SyKCBLQstc1c","colab_type":"code","colab":{}},"source":["# This Function is used to generate all the subset of any set\n","def genSubsets(l):\n"," powerSetSize = 2 ** len(l)\n"," powerSet = []\n"," for i in range(1,powerSetSize):\n"," tempEle = []\n"," for j in range(len(l)):\n"," binFlagInd = i & (1 << j)\n"," if binFlagInd:\n"," tempEle.append(l[j])\n"," powerSet.append(tempEle)\n"," return powerSet"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"3YS5rENItc1h","colab_type":"code","colab":{}},"source":["# Apriori Algorithm Implementation\n","def genCandidate(Fk1): #Fk1 indicates F(k-1), it is a list of lists\n"," Ck = []\n"," k1 = len(Fk1[0])\n","\n"," # COMBINE STEP\n"," for i in range(len(Fk1)-1):\n"," for j in range(i+1,len(Fk1)):\n"," f1,f2 = Fk1[i],Fk1[j]\n","\n"," if f1[:len(f1)-1] == f2[:len(f2)-1] and f1[-1] < f2[-1]:\n"," tempC = f1 + [f2[-1]]\n","\n"," # PRUNING STEP\n"," subset = genSubsets(tempC)\n"," appendSts = True\n"," for item in subset:\n"," if len(item) == k1 and item not in Fk1:\n"," appendSts = False\n"," if appendSts:\n"," Ck.append(tempC) \n"," return Ck"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"uDLx2ikutc1l","colab_type":"code","colab":{}},"source":["\n"," \n","def initPass(txList): # list of transactions, most possibly a dict\n"," allTx = [item for tx in txList for item in tx]\n"," allTx.sort()\n"," cntr = OrderedDict()\n"," for tx in allTx:\n"," cntr[tx] = cntr.get(tx,0) + 1\n","\n"," return cntr\n","\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"APaXQEnHtc1q","colab_type":"code","colab":{}},"source":["def searchInT(t,candid):\n"," found = True\n"," for eachCandid in candid:\n"," if eachCandid not in t:\n"," found = False\n","\n"," return found\n"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"WdFtOjVYtc1u","colab_type":"code","colab":{}},"source":["def apriori(T,minSup):\n"," finalSet = []\n"," c1 = initPass(T)\n"," f = [[item] for item in c1.keys() if c1[item]/len(T) >= minSup] # f1\n"," \n"," for item in f:\n"," finalSet.append(item)\n","\n"," while len(f) != 0:\n"," Ck = genCandidate(f)\n"," # print(\"Ck\")\n"," # print(Ck)\n"," freqDict = {}\n"," for t in T:\n"," for candidate in Ck:\n"," if searchInT(t,candidate):\n"," freqDict[tuple(candidate)] = freqDict.get(tuple(candidate),0) + 1\n"," # print(\"freqDict\")\n"," # print(freqDict)\n"," f = []\n"," for c in freqDict.keys():\n"," if freqDict[c]/len(T)>= minSup:\n"," f.append(list(c))\n","\n"," # print(\"f\")\n"," # print(f)\n"," if len(f) != 0:\n"," f = sorted(f,key=lambda x : (len(x),*x))\n"," for item in f:\n"," finalSet.append(item)\n"," # print(finalSet)\n"," return finalSet"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"JzMczYXftc10","colab_type":"code","outputId":"6e2bbaa0-5b9f-40a2-9535-6069c58a7e3a","executionInfo":{"status":"ok","timestamp":1586952869485,"user_tz":-330,"elapsed":4234,"user":{"displayName":"Sakshi Chandel","photoUrl":"","userId":"07060358668764426856"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["T = [\n"," ['1', '3', '4'],\n"," ['2', '3', '5'],\n"," ['1', '2', '3', '5'],\n"," ['1','2','5']\n"," ] \n","\n","print(apriori(T,0.5))"],"execution_count":8,"outputs":[{"output_type":"stream","text":["[['1'], ['2'], ['3'], ['5'], ['1', '2'], ['1', '3'], ['1', '5'], ['2', '3'], ['2', '5'], ['3', '5'], ['1', '2', '5'], ['2', '3', '5']]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"Q-7oN939tc17","colab_type":"code","colab":{}},"source":["T2=[['Bread', 'Milk'], \n"," ['Bread', 'Diapers', 'Beer', 'Eggs'], \n"," ['Milk', 'Diapers', 'Beer', 'Coke'], \n"," ['Bread', 'Milk', 'Diapers', 'Beer'], \n"," ['Bread', 'Milk', 'Diapers', 'Coke']]\n","F=apriori(T2,0.5)"],"execution_count":0,"outputs":[]},{"cell_type":"code","metadata":{"id":"C6jdj_Autc2D","colab_type":"code","outputId":"d0d84ac4-d982-4dcb-c9fd-c11a780b8089","executionInfo":{"status":"ok","timestamp":1586952869489,"user_tz":-330,"elapsed":3358,"user":{"displayName":"Sakshi Chandel","photoUrl":"","userId":"07060358668764426856"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["print(F)"],"execution_count":10,"outputs":[{"output_type":"stream","text":["[['Beer'], ['Bread'], ['Diapers'], ['Milk'], ['Beer', 'Diapers'], ['Bread', 'Diapers'], ['Bread', 'Milk'], ['Diapers', 'Milk']]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"32QlAGXNtc2I","colab_type":"code","outputId":"feda9b0e-3a79-4a8b-97a1-aca3d8ad2236","executionInfo":{"status":"ok","timestamp":1586954900159,"user_tz":-330,"elapsed":2674,"user":{"displayName":"Sakshi Chandel","photoUrl":"","userId":"07060358668764426856"}},"colab":{"base_uri":"https://localhost:8080/","height":34}},"source":["T3 = [\n"," [1, 3, 4], \n"," [2, 3, 5], \n"," [1, 2, 3, 5], \n"," [2, 5], \n"," [2, 3, 5], \n"," [1, 2, 3, 4, 5],\n"," [1, 3, 4], \n"," [2, 3, 5], \n"," [1, 2, 3, 5], \n"," [1, 3, 4], \n"," [2, 4, 5]\n"," ] \n","\n","print(apriori(T3,0.5))"],"execution_count":13,"outputs":[{"output_type":"stream","text":["[[1], [2], [3], [5], [1, 3], [2, 3], [2, 5], [3, 5], [2, 3, 5]]\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"w2I4fO343z5E","colab_type":"code","colab":{}},"source":[""],"execution_count":0,"outputs":[]}]}