## MODEL

In [1]:
!pip install pyreadr sentence_transformers

^C


Collecting pyreadr
  Obtaining dependency information for pyreadr from https://files.pythonhosted.org/packages/7f/f4/1cfacf40d6943fae716b77a7376812b1b4bc049f077453f3008e2065a0ca/pyreadr-0.5.3-cp312-cp312-win_amd64.whl.metadata
  Downloading pyreadr-0.5.3-cp312-cp312-win_amd64.whl.metadata (1.4 kB)
Collecting sentence_transformers
  Obtaining dependency information for sentence_transformers from https://files.pythonhosted.org/packages/6d/70/2b5b76e98191ec3b8b0d1dde52d00ddcc3806799149a9ce987b0d2d31015/sentence_transformers-5.1.0-py3-none-any.whl.metadata
  Downloading sentence_transformers-5.1.0-py3-none-any.whl.metadata (16 kB)
Collecting pandas>=1.2.0 (from pyreadr)
  Obtaining dependency information for pandas>=1.2.0 from https://files.pythonhosted.org/packages/28/30/8114832daff7489f179971dbc1d854109b7f4365a546e3ea75b6516cea95/pandas-2.3.2-cp312-cp312-win_amd64.whl.metadata
  Downloading pandas-2.3.2-cp312-cp312-win_amd64.whl.metadata (19 kB)
Collecting transformers<5.0.0,>=4.41.0 (fr


[notice] A new release of pip is available: 23.2.1 -> 25.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [None]:
import numpy as np
import pandas as pd
import random
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
import joblib

# -------------------------------------------------
# 1. Define Districts, Streams, Government Univ., and Course Mappings
# -------------------------------------------------

# All 25 Sri Lankan districts
districts = [
    "Colombo", "Gampaha", "Kalutara", "Kandy", "Matale", "Nuwara Eliya",
    "Galle", "Matara", "Hambantota", "Jaffna", "Kilinochchi", "Mannar",
    "Vavuniya", "Mullaitivu", "Batticaloa", "Ampara", "Trincomalee",
    "Kurunegala", "Puttalam", "Anuradhapura", "Polonnaruwa", "Badulla",
    "Monaragala", "Ratnapura", "Kegalle"
]

# A-Level streams
streams = ["Physical Science", "Biological Science", "Commerce", "Arts", "Technology"]

# List of government universities (7 universities)
gov_universities = [
    "University of Colombo",
    "University of Peradeniya",
    "University of Moratuwa",
    "University of Sri Jayewardenepura",
    "University of Kelaniya",
    "University of Ruhuna",
    "University of Jaffna"
]

# Mapping for government universities: for each university, assign a list of courses per stream.
gov_univ_course_mapping = {
    "University of Colombo": {
        "Physical Science": ["BSc in Software Engineering", "BSc in Mechanical Engineering"],
        "Biological Science": ["BSc in Computer Science", "BSc in Environmental Science"],
        "Commerce": ["BSc in Business Analytics", "BSc in Finance"],
        "Arts": ["BA in Economics", "BA in History"],
        "Technology": ["BSc in Information Technology", "BSc in Computer Networking"]
    },
    "University of Peradeniya": {
        "Physical Science": ["BSc in Electrical Engineering", "BSc in Civil Engineering"],
        "Biological Science": ["BSc in Biotechnology", "BSc in Life Sciences"],
        "Commerce": ["BSc in Accounting", "BSc in Management"],
        "Arts": ["BA in Literature", "BA in Political Science"],
        "Technology": ["BSc in Chemical Engineering", "BSc in Environmental Engineering"]
    },
    "University of Moratuwa": {
        "Physical Science": ["BSc in Computer Engineering", "BSc in Industrial Engineering"],
        "Biological Science": ["BSc in Bioengineering", "BSc in Medical Imaging"],
        "Commerce": ["BSc in Marketing", "BSc in MIS"],
        "Arts": ["BA in Architecture", "BA in Design"],
        "Technology": ["BSc in Information Technology", "BSc in Software Engineering"]
    },
    "University of Sri Jayewardenepura": {
        "Physical Science": ["BSc in Chemistry", "BSc in Physics"],
        "Biological Science": ["BSc in Biology", "BSc in Biochemistry"],
        "Commerce": ["BSc in Economics", "BSc in Business Studies"],
        "Arts": ["BA in Sociology", "BA in Psychology"],
        "Technology": ["BSc in Information Systems", "BSc in Computer Science"]
    },
    "University of Kelaniya": {
        "Physical Science": ["BSc in Physics", "BSc in Mathematics"],
        "Biological Science": ["BSc in Environmental Studies", "BSc in Agricultural Science"],
        "Commerce": ["BSc in Tourism Management", "BSc in Hospitality Management"],
        "Arts": ["BA in Fine Arts", "BA in Performing Arts"],
        "Technology": ["BSc in Software Engineering", "BSc in Information Technology"]
    },
    "University of Ruhuna": {
        "Physical Science": ["BSc in Marine Engineering", "BSc in Geoscience"],
        "Biological Science": ["BSc in Fisheries Science", "BSc in Environmental Management"],
        "Commerce": ["BSc in Business Administration", "BSc in Logistics"],
        "Arts": ["BA in Cultural Studies", "BA in Mass Communication"],
        "Technology": ["BSc in Computer Science", "BSc in Electronics Engineering"]
    },
    "University of Jaffna": {
        "Physical Science": ["BSc in Civil Engineering", "BSc in Architecture"],
        "Biological Science": ["BSc in Biotechnology", "BSc in Zoology"],
        "Commerce": ["BSc in Economics", "BSc in Accountancy"],
        "Arts": ["BA in Tamil Studies", "BA in History"],
        "Technology": ["BSc in Information Technology", "BSc in Software Engineering"]
    }
}

# Deterministic assignment of government university based on Z_score
def assign_gov_university(z):
    if z > 2.7:
        return "University of Moratuwa"
    elif z > 2.3:
        return "University of Colombo"
    elif z > 1.9:
        return "University of Peradeniya"
    elif z > 1.5:
        return "University of Sri Jayewardenepura"
    elif z > 1.1:
        return "University of Kelaniya"
    elif z > 0.8:
        return "University of Ruhuna"
    else:
        return "University of Jaffna"

# -------------------------------------------------
# 2. Generate the Synthetic Dataset for Government Univ. (1000 Rows)
# -------------------------------------------------
num_rows = 1000
np.random.seed(42)
random.seed(42)

z_scores = np.round(np.random.uniform(0.5, 3.0, num_rows), 2)
district_choices = [random.choice(districts) for _ in range(num_rows)]
stream_choices = [random.choice(streams) for _ in range(num_rows)]

assigned_gov_universities = [assign_gov_university(z) for z in z_scores]
assigned_gov_courses = [
    random.choice(gov_univ_course_mapping[uni][stream])
    for uni, stream in zip(assigned_gov_universities, stream_choices)
]

gov_data = {
    "Z_score": z_scores,
    "District": district_choices,
    "Stream": stream_choices,
    "Selected_University": assigned_gov_universities,
    "Course": assigned_gov_courses
}
gov_df = pd.DataFrame(gov_data)

# Save government synthetic dataset to CSV
gov_dataset_filename = "government_universities.csv"
gov_df.to_csv(gov_dataset_filename, index=False)
print(f"Government dataset saved to {gov_dataset_filename}")
print("Sample of Government Synthetic Dataset:")
print(gov_df.head(10))

# Create composite target for government data
gov_df["Target"] = gov_df["Selected_University"] + "|" + gov_df["Course"]

# Encode features for government model training
le_district = LabelEncoder()
le_stream = LabelEncoder()
le_target = LabelEncoder()

gov_df["District_Enc"] = le_district.fit_transform(gov_df["District"])
gov_df["Stream_Enc"] = le_stream.fit_transform(gov_df["Stream"])
gov_df["Target_Enc"] = le_target.fit_transform(gov_df["Target"])

X = gov_df[["Z_score", "District_Enc", "Stream_Enc"]]
y = gov_df["Target_Enc"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

clf = RandomForestClassifier(n_estimators=200, random_state=42)
clf.fit(X_train, y_train)
accuracy = clf.score(X_test, y_test)
print(f"\nGovernment Model accuracy on test set: {accuracy:.2f}")

# Save the government model and encoders as a .pkl file
gov_model_filename = "university_selection_model.pkl"
joblib.dump({
    "model": clf,
    "le_district": le_district,
    "le_stream": le_stream,
    "le_target": le_target
}, gov_model_filename)
print(f"\nGovernment model saved to {gov_model_filename}")

# -------------------------------------------------
# 3. Example Inference Using the Saved Government Model
# -------------------------------------------------
test_z = 2.3
test_district = "Kandy"
test_stream = "Physical Science"

X_new = pd.DataFrame({
    "Z_score": [test_z],
    "District_Enc": le_district.transform([test_district]),
    "Stream_Enc": le_stream.transform([test_stream])
})

pred_enc = clf.predict(X_new)[0]
pred_composite = le_target.inverse_transform([pred_enc])[0]
gov_univ_pred, gov_course_pred = pred_composite.split("|")
print(f"\n[Government] Predicted University: {gov_univ_pred}")
print(f"[Government] Predicted Course: {gov_course_pred}")




Government dataset saved to government_universities.csv
Sample of Government Synthetic Dataset:
   Z_score     District              Stream       Selected_University  \
0     1.44  Polonnaruwa  Biological Science    University of Kelaniya   
1     2.88        Kandy            Commerce    University of Moratuwa   
2     2.33      Colombo    Physical Science     University of Colombo   
3     2.00    Ratnapura          Technology  University of Peradeniya   
4     0.89   Hambantota  Biological Science      University of Ruhuna   
5     0.89       Matara            Commerce      University of Ruhuna   
6     0.65       Matara    Physical Science      University of Jaffna   
7     2.67       Matale    Physical Science     University of Colombo   
8     2.00    Ratnapura          Technology  University of Peradeniya   
9     2.27        Kandy            Commerce  University of Peradeniya   

                            Course  
0     BSc in Environmental Studies  
1                       BS

In [None]:
import pandas as pd

# Build a list of dictionaries—each dictionary is a row.
private_rows = []

# NSBM Green University
nsbm_courses = [
    {"Degree": "BSc (Hons) in Software Engineering", "Relevant_Field": "software engineering, development, programming", "Link": "https://nsbm.lk/degree/software-engineering"},
    {"Degree": "BSc (Hons) in Data Science", "Relevant_Field": "data science, analytics, programming", "Link": "https://nsbm.lk/degree/data-science"},
    {"Degree": "BSc (Hons) in Computer Science", "Relevant_Field": "computer science, programming, software engineering", "Link": "https://nsbm.lk/degree/computer-science"},
    {"Degree": "BSc (Hons) in Information Technology", "Relevant_Field": "information technology, programming, networking", "Link": "https://nsbm.lk/degree/information-technology"},
    {"Degree": "BSc (Hons) in Cyber Security", "Relevant_Field": "cyber security, networking, programming", "Link": "https://nsbm.lk/degree/cyber-security"}
]
for course in nsbm_courses:
    private_rows.append({
        "University": "NSBM Green University",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# SLIIT
sliit_courses = [
    {"Degree": "BSc (Hons) in Data Science", "Relevant_Field": "data science, analytics, programming", "Link": "https://sliit.lk/degree/data-science"},
    {"Degree": "BSc (Hons) in Software Engineering", "Relevant_Field": "software engineering, programming", "Link": "https://sliit.lk/degree/software-engineering"},
    {"Degree": "BSc (Hons) in Electrical and Electronic Engineering", "Relevant_Field": "electrical engineering, electronics, programming", "Link": "https://sliit.lk/degree/electrical-engineering"},
    {"Degree": "BSc (Hons) in Business Information Technology", "Relevant_Field": "business IT, commerce, programming", "Link": "https://sliit.lk/degree/business-it"},
    {"Degree": "BSc (Hons) in Computer Science", "Relevant_Field": "computer science, programming, technology", "Link": "https://sliit.lk/degree/computer-science"}
]
for course in sliit_courses:
    private_rows.append({
        "University": "SLIIT",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# ICBT Campus
icbt_courses = [
    {"Degree": "BSc (Hons) in Information Technology", "Relevant_Field": "information technology, programming", "Link": "https://icbt.lk/degree/information-technology"},
    {"Degree": "BSc (Hons) in Computer Engineering", "Relevant_Field": "computer engineering, programming", "Link": "https://icbt.lk/degree/computer-engineering"},
    {"Degree": "BSc (Hons) in Data Science", "Relevant_Field": "data science, analytics, programming", "Link": "https://icbt.lk/degree/data-science"},
    {"Degree": "BSc (Hons) in Accounting & Finance", "Relevant_Field": "accounting, finance, commerce", "Link": "https://icbt.lk/degree/accounting-finance"},
    {"Degree": "BA (Hons) in Mass Communication", "Relevant_Field": "mass communication, media, arts", "Link": "https://icbt.lk/degree/mass-communication"}
]
for course in icbt_courses:
    private_rows.append({
        "University": "ICBT Campus",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# Horizon Campus
horizon_courses = [
    {"Degree": "BSc (Hons) in Business Information Systems", "Relevant_Field": "business systems, IT, management", "Link": "https://horizoncampus.edu.lk/degree/business-info-systems"},
    {"Degree": "BSc (Hons) in Civil Engineering", "Relevant_Field": "civil engineering, construction, physical science", "Link": "https://horizoncampus.edu.lk/degree/civil-engineering"},
    {"Degree": "BSc (Hons) in Information Technology", "Relevant_Field": "information technology, programming, technology", "Link": "https://horizoncampus.edu.lk/degree/information-technology"},
    {"Degree": "BA (Hons) in Media Studies", "Relevant_Field": "media studies, communication, arts", "Link": "https://horizoncampus.edu.lk/degree/media-studies"}
]
for course in horizon_courses:
    private_rows.append({
        "University": "Horizon Campus",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# Universal College Lanka (UCL)
ucl_courses = [
    {"Degree": "BSc (Hons) in Computer Science", "Relevant_Field": "computer science, programming", "Link": "https://universalcollege.lk/degree/computer-science"},
    {"Degree": "BSc (Hons) in Electrical Engineering", "Relevant_Field": "electrical engineering, technology", "Link": "https://universalcollege.lk/degree/electrical-engineering"},
    {"Degree": "BSc (Hons) in Information Technology", "Relevant_Field": "information technology, programming", "Link": "https://universalcollege.lk/degree/information-technology"},
    {"Degree": "BSc (Hons) in Business Analytics", "Relevant_Field": "business analytics, statistics, commerce", "Link": "https://universalcollege.lk/degree/business-analytics"}
]
for course in ucl_courses:
    private_rows.append({
        "University": "Universal College Lanka (UCL)",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# Sri Lanka Technological Campus (SLTC)
sltc_courses = [
    {"Degree": "BSc (Hons) in Cyber Security", "Relevant_Field": "cyber security, networking", "Link": "https://sltc.lk/degree/cyber-security"},
    {"Degree": "BSc (Hons) in Computer Science", "Relevant_Field": "computer science, programming", "Link": "https://sltc.lk/degree/computer-science"},
    {"Degree": "BSc (Hons) in Information Technology", "Relevant_Field": "information technology, programming", "Link": "https://sltc.lk/degree/information-technology"},
    {"Degree": "BSc (Hons) in Data Science", "Relevant_Field": "data science, analytics", "Link": "https://sltc.lk/degree/data-science"}
]
for course in sltc_courses:
    private_rows.append({
        "University": "Sri Lanka Technological Campus (SLTC)",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# CINEC Campus
cinec_courses = [
    {"Degree": "BSc (Hons) in Multimedia Technology", "Relevant_Field": "multimedia, design, programming", "Link": "https://cinec.edu/degree/multimedia-tech"},
    {"Degree": "BSc (Hons) in Animation", "Relevant_Field": "animation, design, arts", "Link": "https://cinec.edu/degree/animation"},
    {"Degree": "BSc (Hons) in Graphic Design", "Relevant_Field": "graphic design, arts, design", "Link": "https://cinec.edu/degree/graphic-design"}
]
for course in cinec_courses:
    private_rows.append({
        "University": "CINEC Campus",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# AIC Campus
aic_courses = [
    {"Degree": "BSc (Hons) in AI and Data Science", "Relevant_Field": "artificial intelligence, data science, machine learning", "Link": "https://aic.lk/degree/ai-data-science"},
    {"Degree": "BSc (Hons) in Artificial Intelligence", "Relevant_Field": "artificial intelligence, machine learning", "Link": "https://aic.lk/degree/artificial-intelligence"},
    {"Degree": "BSc (Hons) in Machine Learning", "Relevant_Field": "machine learning, data science, AI", "Link": "https://aic.lk/degree/machine-learning"}
]
for course in aic_courses:
    private_rows.append({
        "University": "AIC Campus",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# Royal Institute of Colombo
ric_courses = [
    {"Degree": "BSc (Hons) in Computer Networking", "Relevant_Field": "computer networking, telecommunications", "Link": "https://ric.lk/degree/computer-networking"},
    {"Degree": "BSc (Hons) in Cyber Security", "Relevant_Field": "cyber security, networking", "Link": "https://ric.lk/degree/cyber-security"},
    {"Degree": "BSc (Hons) in Information Technology", "Relevant_Field": "information technology, programming", "Link": "https://ric.lk/degree/information-technology"}
]
for course in ric_courses:
    private_rows.append({
        "University": "Royal Institute of Colombo",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# NIBM
nibm_courses = [
    {"Degree": "BSc (Hons) in Business Analytics", "Relevant_Field": "business analytics, statistics, data", "Link": "https://nibm.lk/degree/business-analytics"},
    {"Degree": "BSc (Hons) in Digital Marketing", "Relevant_Field": "digital marketing, commerce, media", "Link": "https://nibm.lk/degree/digital-marketing"},
    {"Degree": "BSc (Hons) in Management", "Relevant_Field": "management, business, administration", "Link": "https://nibm.lk/degree/management"}
]
for course in nibm_courses:
    private_rows.append({
        "University": "NIBM",
        "Degree": course["Degree"],
        "Relevant_Field": course["Relevant_Field"],
        "Link": course["Link"]
    })

# IIT - Add 10 degree programs per stream.
# Define a helper function to add IIT courses.
def add_iit_courses(stream, courses):
    for course in courses:
        private_rows.append({
            "University": "IIT",
            "Degree": course["Degree"],
            "Relevant_Field": course["Relevant_Field"],
            "Link": course["Link"]
        })

# IIT courses for each stream
iit_physical = [
    {"Degree": "BSc (Hons) in Computer Science", "Relevant_Field": "computer science, physical science", "Link": "https://iit.lk/degree/computer-science"},
    {"Degree": "BSc (Hons) in Data Analytics", "Relevant_Field": "data analytics, physical science", "Link": "https://iit.lk/degree/data-analytics"},
    {"Degree": "BSc (Hons) in Cyber Security", "Relevant_Field": "cyber security, physical science", "Link": "https://iit.lk/degree/cyber-security"},
    {"Degree": "BSc (Hons) in Electrical Engineering", "Relevant_Field": "electrical engineering, physical science", "Link": "https://iit.lk/degree/electrical-engineering"},
    {"Degree": "BSc (Hons) in Mechanical Engineering", "Relevant_Field": "mechanical engineering, physical science", "Link": "https://iit.lk/degree/mechanical-engineering"},
    {"Degree": "BSc (Hons) in Software Engineering", "Relevant_Field": "software engineering, physical science", "Link": "https://iit.lk/degree/software-engineering"},
    {"Degree": "BSc (Hons) in Robotics Engineering", "Relevant_Field": "robotics, physical science", "Link": "https://iit.lk/degree/robotics-engineering"},
    {"Degree": "BSc (Hons) in Artificial Intelligence", "Relevant_Field": "artificial intelligence, physical science", "Link": "https://iit.lk/degree/artificial-intelligence"},
    {"Degree": "BSc (Hons) in Digital Signal Processing", "Relevant_Field": "DSP, physical science", "Link": "https://iit.lk/degree/digital-signal-processing"},
    {"Degree": "BSc (Hons) in Quantum Computing", "Relevant_Field": "quantum computing, physical science", "Link": "https://iit.lk/degree/quantum-computing"}
]
iit_biological = [
    {"Degree": "BSc (Hons) in Bioinformatics", "Relevant_Field": "bioinformatics, biological science", "Link": "https://iit.lk/degree/bioinformatics"},
    {"Degree": "BSc (Hons) in Biotechnology", "Relevant_Field": "biotechnology, biological science", "Link": "https://iit.lk/degree/biotechnology"},
    {"Degree": "BSc (Hons) in Molecular Biology", "Relevant_Field": "molecular biology, biological science", "Link": "https://iit.lk/degree/molecular-biology"},
    {"Degree": "BSc (Hons) in Genetic Engineering", "Relevant_Field": "genetic engineering, biological science", "Link": "https://iit.lk/degree/genetic-engineering"},
    {"Degree": "BSc (Hons) in Neuroscience", "Relevant_Field": "neuroscience, biological science", "Link": "https://iit.lk/degree/neuroscience"},
    {"Degree": "BSc (Hons) in Environmental Biology", "Relevant_Field": "environmental biology, biological science", "Link": "https://iit.lk/degree/environmental-biology"},
    {"Degree": "BSc (Hons) in Microbiology", "Relevant_Field": "microbiology, biological science", "Link": "https://iit.lk/degree/microbiology"},
    {"Degree": "BSc (Hons) in Marine Biology", "Relevant_Field": "marine biology, biological science", "Link": "https://iit.lk/degree/marine-biology"},
    {"Degree": "BSc (Hons) in Biomedical Engineering", "Relevant_Field": "biomedical engineering, biological science", "Link": "https://iit.lk/degree/biomedical-engineering"},
    {"Degree": "BSc (Hons) in Clinical Research", "Relevant_Field": "clinical research, biological science", "Link": "https://iit.lk/degree/clinical-research"}
]
iit_commerce = [
    {"Degree": "BSc (Hons) in Business Informatics", "Relevant_Field": "business informatics, commerce", "Link": "https://iit.lk/degree/business-informatics"},
    {"Degree": "BSc (Hons) in Financial Engineering", "Relevant_Field": "financial engineering, commerce", "Link": "https://iit.lk/degree/financial-engineering"},
    {"Degree": "BSc (Hons) in E-Commerce", "Relevant_Field": "e-commerce, commerce", "Link": "https://iit.lk/degree/e-commerce"},
    {"Degree": "BSc (Hons) in Marketing Analytics", "Relevant_Field": "marketing analytics, commerce", "Link": "https://iit.lk/degree/marketing-analytics"},
    {"Degree": "BSc (Hons) in International Business", "Relevant_Field": "international business, commerce", "Link": "https://iit.lk/degree/international-business"},
    {"Degree": "BSc (Hons) in Supply Chain Management", "Relevant_Field": "supply chain management, commerce", "Link": "https://iit.lk/degree/supply-chain-management"},
    {"Degree": "BSc (Hons) in Business Analytics", "Relevant_Field": "business analytics, commerce", "Link": "https://iit.lk/degree/business-analytics"},
    {"Degree": "BSc (Hons) in Digital Marketing", "Relevant_Field": "digital marketing, commerce", "Link": "https://iit.lk/degree/digital-marketing"},
    {"Degree": "BSc (Hons) in Corporate Finance", "Relevant_Field": "corporate finance, commerce", "Link": "https://iit.lk/degree/corporate-finance"},
    {"Degree": "BSc (Hons) in Entrepreneurial Studies", "Relevant_Field": "entrepreneurial studies, commerce", "Link": "https://iit.lk/degree/entrepreneurial-studies"}
]
iit_arts = [
    {"Degree": "BA (Hons) in Digital Media", "Relevant_Field": "digital media, arts", "Link": "https://iit.lk/degree/digital-media"},
    {"Degree": "BA (Hons) in Graphic Design", "Relevant_Field": "graphic design, arts", "Link": "https://iit.lk/degree/graphic-design"},
    {"Degree": "BA (Hons) in Film Studies", "Relevant_Field": "film studies, arts", "Link": "https://iit.lk/degree/film-studies"},
    {"Degree": "BA (Hons) in Animation", "Relevant_Field": "animation, arts", "Link": "https://iit.lk/degree/animation"},
    {"Degree": "BA (Hons) in Creative Writing", "Relevant_Field": "creative writing, arts", "Link": "https://iit.lk/degree/creative-writing"},
    {"Degree": "BA (Hons) in Cultural Studies", "Relevant_Field": "cultural studies, arts", "Link": "https://iit.lk/degree/cultural-studies"},
    {"Degree": "BA (Hons) in Music Production", "Relevant_Field": "music production, arts", "Link": "https://iit.lk/degree/music-production"},
    {"Degree": "BA (Hons) in Theatre Arts", "Relevant_Field": "theatre arts, arts", "Link": "https://iit.lk/degree/theatre-arts"},
    {"Degree": "BA (Hons) in Photography", "Relevant_Field": "photography, arts", "Link": "https://iit.lk/degree/photography"},
    {"Degree": "BA (Hons) in Interior Design", "Relevant_Field": "interior design, arts", "Link": "https://iit.lk/degree/interior-design"}
]
iit_technology = [
    {"Degree": "BSc (Hons) in Information Technology", "Relevant_Field": "information technology, technology", "Link": "https://iit.lk/degree/information-technology"},
    {"Degree": "BSc (Hons) in Software Development", "Relevant_Field": "software development, technology", "Link": "https://iit.lk/degree/software-development"},
    {"Degree": "BSc (Hons) in Cloud Computing", "Relevant_Field": "cloud computing, technology", "Link": "https://iit.lk/degree/cloud-computing"},
    {"Degree": "BSc (Hons) in Cyber Forensics", "Relevant_Field": "cyber forensics, technology", "Link": "https://iit.lk/degree/cyber-forensics"},
    {"Degree": "BSc (Hons) in Data Science", "Relevant_Field": "data science, technology", "Link": "https://iit.lk/degree/data-science"},
    {"Degree": "BSc (Hons) in Mobile Application Development", "Relevant_Field": "mobile application development, technology", "Link": "https://iit.lk/degree/mobile-app-development"},
    {"Degree": "BSc (Hons) in Game Development", "Relevant_Field": "game development, technology", "Link": "https://iit.lk/degree/game-development"},
    {"Degree": "BSc (Hons) in Blockchain Technology", "Relevant_Field": "blockchain technology, technology", "Link": "https://iit.lk/degree/blockchain-technology"},
    {"Degree": "BSc (Hons) in Internet of Things", "Relevant_Field": "internet of things, technology", "Link": "https://iit.lk/degree/iot"},
    {"Degree": "BSc (Hons) in DevOps Engineering", "Relevant_Field": "devops engineering, technology", "Link": "https://iit.lk/degree/devops-engineering"}
]

add_iit_courses("Physical Science", iit_physical)
add_iit_courses("Biological Science", iit_biological)
add_iit_courses("Commerce", iit_commerce)
add_iit_courses("Arts", iit_arts)
add_iit_courses("Technology", iit_technology)

# Convert the list of rows into a DataFrame
private_df = pd.DataFrame(private_rows)

# Save to a new CSV file
csv_filename = "private_universities.csv"
private_df.to_csv(csv_filename, index=False)
print(f"Private universities dataset saved to '{csv_filename}'")
print(private_df.head(15))


Private universities dataset saved to 'private_universities.csv'
               University                                             Degree  \
0   NSBM Green University                 BSc (Hons) in Software Engineering   
1   NSBM Green University                         BSc (Hons) in Data Science   
2   NSBM Green University                     BSc (Hons) in Computer Science   
3   NSBM Green University               BSc (Hons) in Information Technology   
4   NSBM Green University                       BSc (Hons) in Cyber Security   
5                   SLIIT                         BSc (Hons) in Data Science   
6                   SLIIT                 BSc (Hons) in Software Engineering   
7                   SLIIT  BSc (Hons) in Electrical and Electronic Engine...   
8                   SLIIT      BSc (Hons) in Business Information Technology   
9                   SLIIT                     BSc (Hons) in Computer Science   
10            ICBT Campus               BSc (Hons) in I

In [None]:
import pandas as pd
import numpy as np
import random
from sentence_transformers import SentenceTransformer, util
import joblib

# -------------------------------------------------
# 1. Load Data & Models
# -------------------------------------------------

# Load the government model & encoders
gov_model_data = joblib.load("university_selection_model.pkl")
gov_model = gov_model_data["model"]
le_district = gov_model_data["le_district"]
le_stream = gov_model_data["le_stream"]
le_target = gov_model_data["le_target"]

# Load the career dataset for skill matching
career_df = pd.read_csv("Career Dataset.csv")
career_df["Career"] = career_df["Career"].fillna("").astype(str)
career_df["Skill"] = career_df["Skill"].fillna("").astype(str)

# Load the private universities
private_unis = pd.read_csv("private_universities.csv")
private_unis["Relevant_Field"] = private_unis["Relevant_Field"].fillna("").astype(str)

# Load the government universities
gov_unis_df = pd.read_csv("government_universities.csv")
gov_unis_df["Stream"] = gov_unis_df["Stream"].fillna("").astype(str)

# Pre-load Sentence-BERT
model = SentenceTransformer("paraphrase-mpnet-base-v2")

# -------------------------------------------------
# 2. Semantic Search for the User's Skill Input
# -------------------------------------------------
# We embed the "Skill" column from the career dataset to find the best matching career.
skill_embeddings = model.encode(career_df["Skill"].tolist(), convert_to_tensor=True)

In [None]:
model2 = SentenceTransformer("paraphrase-mpnet-base-v2")

In [None]:


user_skill_input = input("Enter the skill area you are interested in (e.g., 'data analytics'): ").strip()
query_embedding = model2.encode(user_skill_input, convert_to_tensor=True)


top_k = 5
search_results = util.semantic_search(query_embedding, skill_embeddings, top_k=top_k)[0]

if not search_results:
    print("No matching skills found in the dataset. Showing fallback career from the entire dataset.\n")
    fallback_career = career_df.iloc[0]  # Or randomly pick any row
    matched_career = fallback_career["Career"]
    matched_skill_text = fallback_career["Skill"]
    print(f"Fallback Career: '{matched_career}' / Skill: '{matched_skill_text}'")
else:
    # Pick the top match
    top_match = search_results[0]
    matched_career = career_df.iloc[top_match['corpus_id']]['Career']
    matched_skill_text = career_df.iloc[top_match['corpus_id']]['Skill']
    matched_score = top_match["score"]
    print(f"\nTop matched skill from dataset: '{matched_skill_text}' (Score: {matched_score:.2f})")
    print(f"Corresponding Career: '{matched_career}'")

# -------------------------------------------------
# 3. Private University Recommendations (with fallback)
# -------------------------------------------------
print("\n----- PRIVATE UNIVERSITY RECOMMENDATIONS -----")

# 3.1 Embed all private unis' "Relevant_Field"
private_fields = private_unis["Relevant_Field"].tolist()
private_field_embeddings = model.encode(private_fields, convert_to_tensor=True)

# 3.2 Semantic search
private_results = util.semantic_search(query_embedding, private_field_embeddings, top_k=len(private_unis))[0]

if not private_results:
    print(f"No semantic matches found in private universities for skill '{user_skill_input}'.")
    print("Providing fallback suggestion from entire private dataset:\n")
    fallback_row = private_unis.iloc[0]  # or any other fallback logic
    print(f"  University: {fallback_row['University']}")
    print(f"  Degree: {fallback_row['Degree']}")
    print(f"  Relevant Field: {fallback_row['Relevant_Field']}")
    print(f"  Link: {fallback_row['Link']}")
else:
    print("Top Private University Recommendations:")
    # Show top 3
    top_private = private_results[:3]
    for idx, result in enumerate(top_private, start=1):
        row_idx = result["corpus_id"]
        score = result["score"]
        row = private_unis.iloc[row_idx]
        print(f"\nRecommendation {idx}:")
        print(f"  University: {row['University']}")
        print(f"  Degree: {row['Degree']}")
        print(f"  Relevant Field: {row['Relevant_Field']}")
        print(f"  Link: {row['Link']}")
        print(f"  Similarity Score: {score:.2f}")

# -------------------------------------------------
# 4. Government University Recommendations
# -------------------------------------------------
gov_interest = input("\nAre you interested in government universities? (yes/no): ").lower().strip()
if gov_interest in ["yes", "y"]:
    try:
        user_z = float(input("Enter your Z-score: ").strip())
    except ValueError:
        print("Invalid Z-score. Exiting government university search.")
        exit()

    user_district = input("Enter your District (e.g., 'Kandy'): ").strip()
    al_stream = input("Enter your A-Level stream (e.g., 'Physical Science', 'Commerce', etc.): ").strip()

    print("\ n----- GOVERNMENT UNIVERSITY RECOMMENDATIONS -----")

    # 4.1 Filter gov_unis_df by the user's Z-score
    filtered_gov = gov_unis_df[gov_unis_df["Z_score"] <= user_z].copy()
    if filtered_gov.empty:
        print(f"No government programs match your Z-score {user_z}.")
        # Fallback: show at least one program from the entire gov dataset
        fallback_row = gov_unis_df.iloc[0]
        print("\nHowever, here's one fallback suggestion (ignoring Z-score):")
        print(f"  University: {fallback_row['Selected_University']}")
        print(f"  Degree: {fallback_row['Course']}")
        print(f"  Z-Cutoff Required: {fallback_row['Z_score']}")
        print(f"  Relevant Field: {fallback_row['Stream']}")
    else:
        # 4.2 Semantic search on "Stream" for the filtered rows
        gov_fields = filtered_gov["Stream"].tolist()
        gov_field_embeddings = model.encode(gov_fields, convert_to_tensor=True)
        gov_results = util.semantic_search(query_embedding, gov_field_embeddings, top_k=len(filtered_gov))[0]

        if not gov_results:
            print(f"\nNo government universities found matching your skill '{user_skill_input}' after Z-score filter.")
            # Fallback: pick at least one row from 'filtered_gov'
            fallback_gov = filtered_gov.iloc[0]  # or any fallback logic you prefer
            print("\nHere is one fallback suggestion:")
            print(f"  University: {fallback_gov['Selected_University']}")
            print(f"  Course: {fallback_gov['Course']}")
            print(f"  Z-Cutoff Required: {fallback_gov['Z_score']}")
            print(f"  Relevant Field: {fallback_gov['Stream']}")
        else:
            # Show top 3
            print(f"\nTop Government University Recommendations for skill '{user_skill_input}' with Z-score >= {user_z}:")
            top_gov_sem = gov_results[:3]
            for idx, result in enumerate(top_gov_sem, start=1):
                row_idx = result["corpus_id"]
                score = result["score"]
                row = filtered_gov.iloc[row_idx]
                print(f"\nRecommendation {idx}:")
                print(f"  University: {row['Selected_University']}")
                print(f"  Course: {row['Course']}")
                print(f"  Z-Cutoff Required: {row['Z_score']}")
                print(f"  Relevant Field: {row['Stream']}")
                print(f"  Similarity Score: {score:.2f}")

Enter the skill area you are interested in (e.g., 'data analytics'): cyber security

Top matched skill from dataset: 'Cybersecures, Network Security' (Score: 0.90)
Corresponding Career: 'Security'

----- PRIVATE UNIVERSITY RECOMMENDATIONS -----
Top Private University Recommendations:

Recommendation 1:
  University: Sri Lanka Technological Campus (SLTC)
  Degree: BSc (Hons) in Cyber Security
  Relevant Field: cyber security, networking
  Link: https://sltc.lk/degree/cyber-security
  Similarity Score: 0.85

Recommendation 2:
  University: Royal Institute of Colombo
  Degree: BSc (Hons) in Cyber Security
  Relevant Field: cyber security, networking
  Link: https://ric.lk/degree/cyber-security
  Similarity Score: 0.85

Recommendation 3:
  University: NSBM Green University
  Degree: BSc (Hons) in Cyber Security
  Relevant Field: cyber security, networking, programming
  Link: https://nsbm.lk/degree/cyber-security
  Similarity Score: 0.80

Are you interested in government universities? (yes

#server


In [None]:
!pip install pyngrok flask_cors

Collecting pyngrok
  Downloading pyngrok-7.2.3-py3-none-any.whl.metadata (8.7 kB)
Collecting flask_cors
  Downloading flask_cors-5.0.1-py3-none-any.whl.metadata (961 bytes)
Downloading pyngrok-7.2.3-py3-none-any.whl (23 kB)
Downloading flask_cors-5.0.1-py3-none-any.whl (11 kB)
Installing collected packages: pyngrok, flask_cors
Successfully installed flask_cors-5.0.1 pyngrok-7.2.3


In [None]:
import pandas as pd
import numpy as np
import random
from flask import Flask, request, jsonify
from pyngrok import ngrok
from flask_cors import CORS


# ===============================
# 2. Create Flask App
# ===============================
app = Flask(__name__)
CORS(app)

@app.route('/')
def index():
    return "Flask with pyngrok is running!"

@app.route('/recommend', methods=['POST'])
def recommend():
    data = request.json or {}
    user_skill_input = data.get("skill_input", "").strip()
    if not user_skill_input:
        return jsonify({"error": "No skill_input provided"}), 400

    # Perform semantic search on career_df
    query_embedding = model.encode(user_skill_input, convert_to_tensor=True)
    top_k = 5
    search_results = util.semantic_search(query_embedding, skill_embeddings, top_k=top_k)[0]

    if not search_results:
        # fallback to first row
        fallback_career = career_df.iloc[0]
        matched_career = fallback_career["Career"]
        matched_skill_text = fallback_career["Skill"]
        matched_score = 0.0
    else:
        top_match = search_results[0]
        matched_career = career_df.iloc[top_match['corpus_id']]['Career']
        matched_skill_text = career_df.iloc[top_match['corpus_id']]['Skill']
        matched_score = top_match["score"]

    response = {
        "matched_skill_text": matched_skill_text,
        "matched_career": matched_career,
        "matched_score": round(float(matched_score), 2)
    }

    # ================
    # Private Recs
    # ================
    private_field_embeddings = model.encode(private_unis["Relevant_Field"].fillna("").tolist(), convert_to_tensor=True)
    private_results = util.semantic_search(query_embedding, private_field_embeddings, top_k=len(private_unis))[0]

    if not private_results:
        fallback_row = private_unis.iloc[0]
        private_recs = [{
            "University": fallback_row["University"],
            "Degree": fallback_row["Degree"],
            "Relevant_Field": fallback_row["Relevant_Field"],
            "Link": fallback_row["Link"],
            "Similarity_Score": 0.0
        }]
    else:
        top_private = private_results[:3]
        private_recs = []
        for res in top_private:
            row_idx = res["corpus_id"]
            score_val = res["score"]
            row_data = private_unis.iloc[row_idx]
            private_recs.append({
                "University": row_data["University"],
                "Degree": row_data["Degree"],
                "Relevant_Field": row_data["Relevant_Field"],
                "Link": row_data["Link"],
                "Similarity_Score": round(float(score_val), 2)
            })

    response["private_universities"] = private_recs

    # ================
    # Government Recs
    # ================
    gov_interest = data.get("gov_interest", "no").lower()
    if gov_interest in ["yes", "y"]:
        user_z = data.get("z_score", None)
        if user_z is None:
            response["government_universities"] = [{"error": "No z_score provided."}]
        else:
            # Filter by z_score
            filtered_gov = gov_unis_df[gov_unis_df["Z_score"] <= float(user_z)].copy()
            if filtered_gov.empty:
                fallback_gov = gov_unis_df.iloc[0]
                response["government_universities"] = [{
                    "University": fallback_gov["Selected_University"],
                    "Degree": fallback_gov["Course"],
                    "Z_score_Program": fallback_gov["Z_score"],
                    "Stream": fallback_gov["Stream"],
                    "Fallback": True
                }]
            else:
                gov_fields = filtered_gov["Stream"].tolist()
                gov_field_embeddings = model.encode(gov_fields, convert_to_tensor=True)
                gov_results = util.semantic_search(query_embedding, gov_field_embeddings, top_k=len(filtered_gov))[0]
                if not gov_results:
                    fallback_gov = filtered_gov.iloc[0]
                    response["government_universities"] = [{
                        "University": fallback_gov["Selected_University"],
                        "Degree": fallback_gov["Course"],
                        "Z_score_Program": fallback_gov["Z_score"],
                        "Stream": fallback_gov["Stream"],
                        "Fallback": True
                    }]
                else:
                    top_gov_sem = gov_results[:3]
                    gov_recs = []
                    for res in top_gov_sem:
                        row_idx = res["corpus_id"]
                        score_val = res["score"]
                        row_data = filtered_gov.iloc[row_idx]
                        gov_recs.append({
                            "University": row_data["Selected_University"],
                            "Degree": row_data["Course"],
                            "Z_score_Program": float(row_data["Z_score"]),
                            "Stream": row_data["Stream"],
                            "Similarity_Score": round(float(score_val), 2)
                        })
                    response["government_universities"] = gov_recs

    return jsonify(response), 200



# 1) If you have an ngrok auth token, set it:
from pyngrok import ngrok

ngrok.set_auth_token("2v0nnKCyJIzVTk2OGum2wvM4VMb_826ChrHn46r8k5LSFtUT7")
# 2) Create the tunnel
public_url = ngrok.connect(5000)
print(" * Pyngrok URL:", public_url.public_url)
# 3) Run the app on port 5000
app.run(port=5000)


 * Pyngrok URL: https://6868-104-197-3-197.ngrok-free.app
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [30/Mar/2025 16:48:59] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/Mar/2025 16:49:00] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [30/Mar/2025 16:52:33] "OPTIONS /recommend HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/Mar/2025 16:52:36] "POST /recommend HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/Mar/2025 16:53:27] "OPTIONS /recommend HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [30/Mar/2025 16:53:35] "POST /recommend HTTP/1.1" 200 -
