# Getting Data from the user: 

In [None]:
#@title Upload Historical Data:

#@markdown ---

from google.colab import files

uploaded = files.upload()

file_name = list(uploaded.keys())[0]

Saving Student_Performance_Data.xlsx to Student_Performance_Data.xlsx


In [None]:
#@title Select the Semester and Paper(s): { run: "auto" }

#@markdown ### Select Semester:

Semester = 'Sem_6' #@param ["Sem_1", "Sem_2", "Sem_3", "Sem_4", "Sem_5", "Sem_6", "Sem_7", "Sem_8"]
key = Semester

#@markdown ### Enter Marks Threshold:

marks_thresh = 75 #@param {type:"slider", min:0, max:100, step:1}

#@markdown ### Enter Minimum Support:
minsup = 0.2 #@param {type:"number"}

#@markdown ### Enter Minimum Confidence:
minconf = 0.48 #@param {type:"number"}

# Code:

## Importing Libraries:

In [None]:
import pandas as pd
import numpy as np
from scipy import sparse
from sklearn.metrics.pairwise import cosine_similarity

## Preparing the Data

In [None]:
df = pd.read_excel(file_name, sheet_name='Sheet1')
df.head()

Unnamed: 0,Student_ID,Semster_Name,Paper_ID,Paper_Name,Marks
0,SID20131143,Sem_1,SEMI0012995,Paper 1,44
1,SID20131143,Sem_1,SEMI0015183,Paper 2,74
2,SID20131143,Sem_1,SEMI0018371,Paper 3,80
3,SID20131143,Sem_1,SEMI0015910,Paper 4,44
4,SID20131143,Sem_1,SEMI0016208,Paper 5,95


In [None]:
df_dict_sem = {}
for name in df['Semster_Name'].unique():
    df_dict_sem[name] = df[df['Semster_Name'] == name].reset_index(drop=True)
df_dict_sem

{'Sem_1':         Student_ID Semster_Name     Paper_ID Paper_Name  Marks
 0      SID20131143        Sem_1  SEMI0012995    Paper 1     44
 1      SID20131143        Sem_1  SEMI0015183    Paper 2     74
 2      SID20131143        Sem_1  SEMI0018371    Paper 3     80
 3      SID20131143        Sem_1  SEMI0015910    Paper 4     44
 4      SID20131143        Sem_1  SEMI0016208    Paper 5     95
 ...            ...          ...          ...        ...    ...
 26197  SID20189989        Sem_1  SEMI0018371    Paper 3     70
 26198  SID20189989        Sem_1  SEMI0015910    Paper 4     41
 26199  SID20189989        Sem_1  SEMI0016208    Paper 5     80
 26200  SID20189989        Sem_1  SEMI0017431    Paper 6     49
 26201  SID20189989        Sem_1  SEMI0014130    Paper 7     75
 
 [26202 rows x 5 columns],
 'Sem_2':         Student_ID Semster_Name     Paper_ID Paper_Name  Marks
 0      SID20131143        Sem_2  SEMI0024747    Paper 1     92
 1      SID20131143        Sem_2  SEMI0025909    Paper 2 

In [None]:
df_sem = df_dict_sem[key]
df_sem.head()

Unnamed: 0,Student_ID,Semster_Name,Paper_ID,Paper_Name,Marks
0,SID20131143,Sem_6,SEMI0068923,Paper 1,98
1,SID20131143,Sem_6,SEMI0067259,Paper 2,84
2,SID20131143,Sem_6,SEMI0065623,Paper 3,46
3,SID20131143,Sem_6,SEMI0064181,Paper 4,76
4,SID20131143,Sem_6,SEMI0064427,Paper 5,54


In [None]:
#Applying Marks Threshold
df_sem_marks_thresh = df_sem[df_sem['Marks'] > marks_thresh].reset_index(drop=True)
df_sem_marks_thresh.head()

Unnamed: 0,Student_ID,Semster_Name,Paper_ID,Paper_Name,Marks
0,SID20131143,Sem_6,SEMI0068923,Paper 1,98
1,SID20131143,Sem_6,SEMI0067259,Paper 2,84
2,SID20131143,Sem_6,SEMI0064181,Paper 4,76
3,SID20131151,Sem_6,SEMI0065623,Paper 3,87
4,SID20131151,Sem_6,SEMI0064181,Paper 4,92


In [None]:
#getting only the sudent ID and the paper name
df_sem_marks_thresh_ID_Paper = df_sem_marks_thresh[['Student_ID', 'Paper_Name']]
df_sem_marks_thresh_ID_Paper

Unnamed: 0,Student_ID,Paper_Name
0,SID20131143,Paper 1
1,SID20131143,Paper 2
2,SID20131143,Paper 4
3,SID20131151,Paper 3
4,SID20131151,Paper 4
...,...,...
10580,SID20189982,Paper 2
10581,SID20189982,Paper 3
10582,SID20189989,Paper 5
10583,SID20189989,Paper 6


## Installing fpgrowth_py:

In [None]:
pip install fpgrowth_py

Collecting fpgrowth_py
  Downloading fpgrowth_py-1.0.0-py3-none-any.whl (5.6 kB)
Installing collected packages: fpgrowth-py
Successfully installed fpgrowth-py-1.0.0


## Converting data and executing:

In [None]:
from fpgrowth_py import fpgrowth

In [None]:
df_sem_encoded = df_sem_marks_thresh_ID_Paper.groupby(['Student_ID'], axis=0)['Paper_Name'].transform(lambda x : ','.join(x))
df_sem_encoded.head()

0    Paper 1,Paper 2,Paper 4
1    Paper 1,Paper 2,Paper 4
2    Paper 1,Paper 2,Paper 4
3            Paper 3,Paper 4
4            Paper 3,Paper 4
Name: Paper_Name, dtype: object

In [None]:
sem_itemlist = [value.split(',') for value in df_sem_encoded.values.tolist()]
sem_itemlist

[['Paper 1', 'Paper 2', 'Paper 4'],
 ['Paper 1', 'Paper 2', 'Paper 4'],
 ['Paper 1', 'Paper 2', 'Paper 4'],
 ['Paper 3', 'Paper 4'],
 ['Paper 3', 'Paper 4'],
 ['Paper 7'],
 ['Paper 4', 'Paper 5', 'Paper 6', 'Paper 7'],
 ['Paper 4', 'Paper 5', 'Paper 6', 'Paper 7'],
 ['Paper 4', 'Paper 5', 'Paper 6', 'Paper 7'],
 ['Paper 4', 'Paper 5', 'Paper 6', 'Paper 7'],
 ['Paper 1', 'Paper 3', 'Paper 7'],
 ['Paper 1', 'Paper 3', 'Paper 7'],
 ['Paper 1', 'Paper 3', 'Paper 7'],
 ['Paper 5'],
 ['Paper 1'],
 ['Paper 2', 'Paper 4', 'Paper 5'],
 ['Paper 2', 'Paper 4', 'Paper 5'],
 ['Paper 2', 'Paper 4', 'Paper 5'],
 ['Paper 1', 'Paper 2', 'Paper 5', 'Paper 6'],
 ['Paper 1', 'Paper 2', 'Paper 5', 'Paper 6'],
 ['Paper 1', 'Paper 2', 'Paper 5', 'Paper 6'],
 ['Paper 1', 'Paper 2', 'Paper 5', 'Paper 6'],
 ['Paper 1', 'Paper 2', 'Paper 4', 'Paper 5'],
 ['Paper 1', 'Paper 2', 'Paper 4', 'Paper 5'],
 ['Paper 1', 'Paper 2', 'Paper 4', 'Paper 5'],
 ['Paper 1', 'Paper 2', 'Paper 4', 'Paper 5'],
 ['Paper 3', 'Paper 

In [None]:
freqItemSet, rules = fpgrowth(sem_itemlist, minsup, minconf)

In [None]:
rules

[[{'Paper 4'}, {'Paper 7'}, 0.48778044871794873],
 [{'Paper 3'}, {'Paper 5'}, 0.48050662972491587],
 [{'Paper 3'}, {'Paper 1'}, 0.48129823867009697],
 [{'Paper 3'}, {'Paper 7'}, 0.48664159905006926],
 [{'Paper 6'}, {'Paper 5'}, 0.4842166796570538],
 [{'Paper 7'}, {'Paper 6'}, 0.4815378759040731],
 [{'Paper 6'}, {'Paper 7'}, 0.4929851909586906],
 [{'Paper 5'}, {'Paper 7'}, 0.4855212355212355]]

In [None]:
freqItemSet

[{'Paper 4'},
 {'Paper 4', 'Paper 6'},
 {'Paper 1', 'Paper 4'},
 {'Paper 4', 'Paper 5'},
 {'Paper 2', 'Paper 4'},
 {'Paper 3', 'Paper 4'},
 {'Paper 4', 'Paper 7'},
 {'Paper 3'},
 {'Paper 2', 'Paper 3'},
 {'Paper 3', 'Paper 6'},
 {'Paper 3', 'Paper 5'},
 {'Paper 1', 'Paper 3'},
 {'Paper 3', 'Paper 7'},
 {'Paper 2'},
 {'Paper 2', 'Paper 6'},
 {'Paper 1', 'Paper 2'},
 {'Paper 2', 'Paper 5'},
 {'Paper 2', 'Paper 7'},
 {'Paper 6'},
 {'Paper 1', 'Paper 6'},
 {'Paper 5', 'Paper 6'},
 {'Paper 6', 'Paper 7'},
 {'Paper 1'},
 {'Paper 1', 'Paper 5'},
 {'Paper 1', 'Paper 7'},
 {'Paper 5'},
 {'Paper 5', 'Paper 7'},
 {'Paper 7'}]

# Final Result:

In [None]:
for item in freqItemSet:
    if len(item) > 1:
        print(item)

{'Paper 6', 'Paper 4'}
{'Paper 1', 'Paper 4'}
{'Paper 4', 'Paper 5'}
{'Paper 4', 'Paper 2'}
{'Paper 3', 'Paper 4'}
{'Paper 4', 'Paper 7'}
{'Paper 3', 'Paper 2'}
{'Paper 3', 'Paper 6'}
{'Paper 3', 'Paper 5'}
{'Paper 1', 'Paper 3'}
{'Paper 3', 'Paper 7'}
{'Paper 6', 'Paper 2'}
{'Paper 1', 'Paper 2'}
{'Paper 5', 'Paper 2'}
{'Paper 7', 'Paper 2'}
{'Paper 1', 'Paper 6'}
{'Paper 6', 'Paper 5'}
{'Paper 6', 'Paper 7'}
{'Paper 1', 'Paper 5'}
{'Paper 1', 'Paper 7'}
{'Paper 7', 'Paper 5'}
