In [129]:
import pandas as pd
from copy import deepcopy
from sklearn import preprocessing
import numpy as np
import pymongo
import pickle
import certifi
from sklearn.metrics import accuracy_score


In [130]:
def load_saved_model_from_db(model_name, client, db, dbconnection):
    json_data = {}

    myclient = pymongo.MongoClient(client, tlsCAFile=certifi.where())

    mydb = myclient[db]
    mycon = mydb[dbconnection]
    data = mycon.find({"name": model_name})

    for i in data:
        json_data = i
        CV_accuracy = i['CV_accuracy']

    pickled_model = json_data[model_name]

    return (pickle.loads(pickled_model), CV_accuracy)


In [131]:
data = pd.read_csv('./CleanData_V2.csv', encoding='unicode_escape')
data.head()

Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Hobby_Interest,Budget (in millions),Fathers_Occupation,Mothers_Occupation,Study_Group,Degree_Program,Matric_Marks,Inter_Marks,Year_of_Admission,Campus,University
0,male,Punjabi,Peshawar,Urdu,Nothing,4,Doctor,House Wife,CS,BSCS,990,800,2019,Peshawar,FAST NUCES
1,male,Pashtun,Kohat,Pashto,Reading,5,Engrr,House Wife,Pre-Engrr,BSEE,900,924,2020,Islamabad,COMSATS
2,male,Punjabi,Islamabad,Punjabi,Gaming,12,Army,Doctor,Pre-Engrr,BSME,930,950,2019,Lahore,LUMS
3,female,Punjabi,Islamabad,Urdu,Nothing,4,Engrr,House Wife,Pre-Med,Medical,890,900,2019,Peshawar,KMC
4,female,Other,Lahore,English,Sports,10,Doctor,Teacher,Pre-Engrr,BSEE,900,950,2019,Lahore,LUMS


In [132]:
data.shape

(200, 15)

In [133]:
data.drop(columns=['Hobby_Interest',
          'Fathers_Occupation', 'Mothers_Occupation', 'Year_of_Admission'],  inplace=True)

data.rename(columns={'Budget (in millions)' : 'Budget'}, inplace=True)
data.head()

Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Marks,Inter_Marks,Campus,University
0,male,Punjabi,Peshawar,Urdu,4,CS,BSCS,990,800,Peshawar,FAST NUCES
1,male,Pashtun,Kohat,Pashto,5,Pre-Engrr,BSEE,900,924,Islamabad,COMSATS
2,male,Punjabi,Islamabad,Punjabi,12,Pre-Engrr,BSME,930,950,Lahore,LUMS
3,female,Punjabi,Islamabad,Urdu,4,Pre-Med,Medical,890,900,Peshawar,KMC
4,female,Other,Lahore,English,10,Pre-Engrr,BSEE,900,950,Lahore,LUMS


### Splitting Data

In [134]:
X = data.iloc[:, 0:10]
y = data.iloc[:, 10]


In [135]:
X

Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Marks,Inter_Marks,Campus
0,male,Punjabi,Peshawar,Urdu,4,CS,BSCS,990,800,Peshawar
1,male,Pashtun,Kohat,Pashto,5,Pre-Engrr,BSEE,900,924,Islamabad
2,male,Punjabi,Islamabad,Punjabi,12,Pre-Engrr,BSME,930,950,Lahore
3,female,Punjabi,Islamabad,Urdu,4,Pre-Med,Medical,890,900,Peshawar
4,female,Other,Lahore,English,10,Pre-Engrr,BSEE,900,950,Lahore
...,...,...,...,...,...,...,...,...,...,...
195,male,Punjabi,Karachi,Urdu,4,Pre-Engrr,BSCE,996,997,Karachi
196,male,Other,Karachi,Urdu,6,Pre-Engrr,BSCE,990,999,Karachi
197,male,Other,Karachi,Punjabi,7,Pre-Engrr,BSME,1010,998,Karachi
198,male,Other,Lahore,Urdu,7,Pre-Engrr,BSEE,1009,920,Islamabad


### Assuming CNIC Given is '13302-0464670-3'

In [136]:
CNIC = '13302-0464670-3'

## Assuming text extracted from image got the following information
study_group = 'Pre-Med'
matric_marks = 950  # Out of 1100
inter_marks = 980 #Out of 1100

In [137]:
client = pymongo.MongoClient(
    'mongodb+srv://nafay:password1234@mernapp.fnkr4nr.mongodb.net/', tlsCAFile=certifi.where())

db = client['FYP']
col = db['datas']
cursor = col.find({"CNIC": CNIC})
for doc in cursor:
    budget = int(doc['budget']/1000000)
    degree_program = doc['degree_program']
    preferred_location = doc['preferred_location']
    #Getting information from transcript later

col = db['users']
cursor = col.find({"CNIC": CNIC})
for doc in cursor:
    gender = doc['gender']
    race = doc['Race_ethnicity']
    preferred_language = doc['Preferred_language']
    homecity = doc['HomeCity']


In [138]:
user_data = {
    'Gender' : gender,
    'Race/Ethnicity': race,  # categorized
    'Home_City': homecity,
    'Preferred_Langauge': preferred_language,
    'Budget': budget,  # categorized
    'Study_Group':  study_group,
    'Degree_Program':  degree_program,
    'Matric_Marks':  matric_marks,  # categorized
    'Inter_Marks':  inter_marks,  # categorized
    'Campus':  preferred_location,
}

user_data = pd.DataFrame([user_data])
user_data.head()

Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Marks,Inter_Marks,Campus
0,male,Other,Lahore,Urdu,5,Pre-Med,Medical,950,980,Islamabad


In [139]:
X = pd.concat([X, user_data])
X

Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Marks,Inter_Marks,Campus
0,male,Punjabi,Peshawar,Urdu,4,CS,BSCS,990,800,Peshawar
1,male,Pashtun,Kohat,Pashto,5,Pre-Engrr,BSEE,900,924,Islamabad
2,male,Punjabi,Islamabad,Punjabi,12,Pre-Engrr,BSME,930,950,Lahore
3,female,Punjabi,Islamabad,Urdu,4,Pre-Med,Medical,890,900,Peshawar
4,female,Other,Lahore,English,10,Pre-Engrr,BSEE,900,950,Lahore
...,...,...,...,...,...,...,...,...,...,...
196,male,Other,Karachi,Urdu,6,Pre-Engrr,BSCE,990,999,Karachi
197,male,Other,Karachi,Punjabi,7,Pre-Engrr,BSME,1010,998,Karachi
198,male,Other,Lahore,Urdu,7,Pre-Engrr,BSEE,1009,920,Islamabad
199,male,Sindhi,Karachi,Sindhi,6,Pre-Engrr,BSEE,997,967,Karachi


### Catergorizing Data

In [140]:
# data['Budget'].value_counts()
min_budget = X['Budget'].min()
max_budget = X['Budget'].max()

X['bins_budget'] = pd.cut(x=X['Budget'], bins=[min_budget, 5, 9, max_budget],
                             labels=['Low', 'Medium', 'High', ])


In [141]:
min_matric = X['Matric_Marks'].min()
max_matric = X['Matric_Marks'].max()

X['Percentage_Matric'] = (X['Matric_Marks']/1100)*100
X['Percentage_Inter'] = (X['Inter_Marks']/1100)*100


X['Matric_Grade'] = pd.cut(x=X['Percentage_Matric'], bins=[
                              0, 49, 59, 69, 79, 89, 94, 100], labels=['F', 'E', 'D', 'C', 'B', 'A', 'A+'])
X['Inter_Grade'] = pd.cut(x=X['Percentage_Inter'], bins=[
                             0, 49, 59, 69, 79, 89, 94, 100], labels=['F', 'E', 'D', 'C', 'B', 'A', 'A+'])


In [142]:
X

Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Marks,Inter_Marks,Campus,bins_budget,Percentage_Matric,Percentage_Inter,Matric_Grade,Inter_Grade
0,male,Punjabi,Peshawar,Urdu,4,CS,BSCS,990,800,Peshawar,Low,90.000000,72.727273,A,C
1,male,Pashtun,Kohat,Pashto,5,Pre-Engrr,BSEE,900,924,Islamabad,Low,81.818182,84.000000,B,B
2,male,Punjabi,Islamabad,Punjabi,12,Pre-Engrr,BSME,930,950,Lahore,High,84.545455,86.363636,B,B
3,female,Punjabi,Islamabad,Urdu,4,Pre-Med,Medical,890,900,Peshawar,Low,80.909091,81.818182,B,B
4,female,Other,Lahore,English,10,Pre-Engrr,BSEE,900,950,Lahore,High,81.818182,86.363636,B,B
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196,male,Other,Karachi,Urdu,6,Pre-Engrr,BSCE,990,999,Karachi,Medium,90.000000,90.818182,A,A
197,male,Other,Karachi,Punjabi,7,Pre-Engrr,BSME,1010,998,Karachi,Medium,91.818182,90.727273,A,A
198,male,Other,Lahore,Urdu,7,Pre-Engrr,BSEE,1009,920,Islamabad,Medium,91.727273,83.636364,A,B
199,male,Sindhi,Karachi,Sindhi,6,Pre-Engrr,BSEE,997,967,Karachi,Medium,90.636364,87.909091,A,B


In [143]:
X.drop(columns=['Budget', 'Matric_Marks', 'Inter_Marks',
          'Percentage_Matric', 'Percentage_Inter'], inplace=True)
          
X.rename(columns={'bins_budget': "Budget"}, inplace=True)

X = X[['Gender', 'Race/Ethnicity', 'Home_City', 'Preferred_Langauge', 'Budget', 'Study_Group', 'Degree_Program', 'Matric_Grade', 'Inter_Grade', 'Campus']]
X.tail()


Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Grade,Inter_Grade,Campus
196,male,Other,Karachi,Urdu,Medium,Pre-Engrr,BSCE,A,A,Karachi
197,male,Other,Karachi,Punjabi,Medium,Pre-Engrr,BSME,A,A,Karachi
198,male,Other,Lahore,Urdu,Medium,Pre-Engrr,BSEE,A,B,Islamabad
199,male,Sindhi,Karachi,Sindhi,Medium,Pre-Engrr,BSEE,A,B,Karachi
0,male,Other,Lahore,Urdu,Low,Pre-Med,Medical,B,A,Islamabad


In [144]:
le = preprocessing.LabelEncoder()

gender = le.fit_transform(list(X['Gender']))
race = le.fit_transform(list(X['Race/Ethnicity']))
home_city = le.fit_transform(list(X['Home_City']))
preferred_language = le.fit_transform(list(X['Preferred_Langauge']))
budget = le.fit_transform(list(X['Budget']))
study_group = le.fit_transform(list(X['Study_Group']))
degree_program = le.fit_transform(list(X['Degree_Program']))
matric_grade = le.fit_transform(list(X['Matric_Grade']))
inter_grade = le.fit_transform(list(X['Inter_Grade']))
campus = le.fit_transform(list(X['Campus']))

class_attribute = le.fit_transform(list(y))

X_prep = pd.DataFrame(list(zip(gender, race, home_city, preferred_language, budget, study_group,
                               degree_program, matric_grade, inter_grade, campus)), columns=['Gender', 'Race/Ethnicity', 'Home_City', 'Preferred_Langauge', 'Budget', 'Study_Group', 'Degree_Program', 'Matric_Grade', 'Inter_Grade', 'Campus'])

y_prep = pd.DataFrame(list(class_attribute), columns=['University'])


In [145]:
X_prep.tail()

Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Grade,Inter_Grade,Campus
196,1,4,3,5,2,2,1,0,0,1
197,1,4,3,3,2,2,5,0,0,1
198,1,4,5,5,2,2,3,0,2,0
199,1,7,3,4,2,2,3,0,2,1
200,1,4,5,5,1,4,7,1,0,0


In [146]:
user_data_prep = pd.DataFrame(X_prep.iloc[200])
user_data_prep = user_data_prep.transpose()
user_data_prep


Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Grade,Inter_Grade,Campus
200,1,4,5,5,1,4,7,1,0,0


In [147]:
# gender = le.inverse_transform(user_data_prep['Gender'])
# race = le.inverse_transform(user_data_prep['Race/Ethnicity'])
# home_city = le.inverse_transform(user_data_prep['Home_City'])
# preferred_language = le.inverse_transform(user_data_prep['Preferred_Langauge'])
# budget = le.inverse_transform(user_data_prep['Budget'])
# study_group = le.inverse_transform(user_data_prep['Study_Group'])
# degree_program = le.inverse_transform(user_data_prep['Degree_Program'])
# matric_grade = le.inverse_transform(user_data_prep['Matric_Grade'])
# inter_grade = le.inverse_transform(user_data_prep['Inter_Grade'])
# campus = le.inverse_transform(user_data_prep['Campus'])


# print(gender, race, home_city, preferred_language,
#       budget,
#       study_group,
#       degree_program,
#       matric_grade,
#       inter_grade,
#       campus)


### Removing last row (data from user) from X_data_prep

In [148]:
X_prep.drop([200], inplace=True)
X_prep.shape

(200, 10)

In [149]:
X_prep.tail()


Unnamed: 0,Gender,Race/Ethnicity,Home_City,Preferred_Langauge,Budget,Study_Group,Degree_Program,Matric_Grade,Inter_Grade,Campus
195,1,6,3,5,1,2,1,0,0,1
196,1,4,3,5,2,2,1,0,0,1
197,1,4,3,3,2,2,5,0,0,1
198,1,4,5,5,2,2,3,0,2,0
199,1,7,3,4,2,2,3,0,2,1


### Fetching model from db

In [150]:
models = np.array(['Random_Forest_Classification',
                   'Support Vector Machine',
                   'Decision Tree Classification',
                   'XGBoost',
                   'Bagging Classifier (Base_Estimator: DTC))',
                   'ADA Boost Classifier (Base_Estimator: DTC))'])



In [151]:
# Loading RFC Model & its corresponding accuracy from DB
rfc, rfc_accuracy = load_saved_model_from_db(
    model_name=models[0], client='mongodb+srv://nafay:password1234@mernapp.fnkr4nr.mongodb.net/', db='FYP', dbconnection='ML_models')


# # Loading SVM Model from DB
svc, svc_accuracy = load_saved_model_from_db(
    model_name=models[1], client='mongodb+srv://nafay:password1234@mernapp.fnkr4nr.mongodb.net/', db='FYP', dbconnection='ML_models')


# # Loading DTC Model from DB
dtc, dtc_accuracy = load_saved_model_from_db(
    model_name=models[2], client='mongodb+srv://nafay:password1234@mernapp.fnkr4nr.mongodb.net/', db='FYP', dbconnection='ML_models')


# # Loading XGB Model from DB
# xgb = load_saved_model_from_db(
    # model_name=models[3], client='mongodb+srv://nafay:password1234@mernapp.fnkr4nr.mongodb.net/', db='FYP', dbconnection='ML_models')


# # Loading BCLF Model from DB
bclf, bclf_accuracy = load_saved_model_from_db(
    model_name=models[4], client='mongodb+srv://nafay:password1234@mernapp.fnkr4nr.mongodb.net/', db='FYP', dbconnection='ML_models')


# # Loading ADAB Model from DB
adab, adab_accuracy = load_saved_model_from_db(
    model_name=models[5], client='mongodb+srv://nafay:password1234@mernapp.fnkr4nr.mongodb.net/', db='FYP', dbconnection='ML_models')



In [152]:
recommendation = []
recommendation.append((rfc.predict(user_data_prep)[0], rfc_accuracy))
recommendation.append([svc.predict(user_data_prep)[0], svc_accuracy])
recommendation.append([dtc.predict(user_data_prep)[0], dtc_accuracy])
# recommendation.append(xgb.predict(user_data_prep))
recommendation.append((bclf.predict(user_data_prep)[0], bclf_accuracy))
recommendation.append((adab.predict(user_data_prep)[0], adab_accuracy))
recommendation

[(0, 77.24358974358975),
 [0, 75.33333333333334],
 [1, 68.0],
 (0, 77.35042735042735),
 (1, 76.65598290598291)]

In [153]:
from statistics import mode
R = []
for i in recommendation:
    R.append(i[0])

print(R)
mode = mode(R)
mode

[0, 0, 1, 0, 1]


0

In [155]:
print(le.inverse_transform([R[2]]))

['COMSATS']


In [154]:
university = le.inverse_transform([mode])
print(university[0])

Aga Khan
