In [1]:
!pip install -U -q "google-generativeai>=0.8.3"

In [4]:
import google.generativeai as genai


In [5]:
genai.configure(api_key="AIzaSyBp-RzlAZAjsaZAwFX4CyFFRFVsADUxjKc")


In [6]:
!pip install fpdf

Collecting fpdf
  Downloading fpdf-1.7.2.tar.gz (39 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: fpdf
  Building wheel for fpdf (setup.py) ... [?25l[?25hdone
  Created wheel for fpdf: filename=fpdf-1.7.2-py2.py3-none-any.whl size=40704 sha256=35522eb48eb28af888cbca1c605b6b95a4f2970619c38e00ea95b737dfad04b4
  Stored in directory: /root/.cache/pip/wheels/f9/95/ba/f418094659025eb9611f17cbcaf2334236bf39a0c3453ea455
Successfully built fpdf
Installing collected packages: fpdf
Successfully installed fpdf-1.7.2


In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from fpdf import FPDF
from transformers import pipeline

In [8]:
# Load the dataset
data = pd.read_csv('/content/cs_students.csv')

# Map skill levels to numerical values
skill_mapping = {'Strong': 2, 'Average': 1, 'Weak': 0}
data['Python'] = data['Python'].map(skill_mapping)
data['SQL'] = data['SQL'].map(skill_mapping)
data['Java'] = data['Java'].map(skill_mapping)

In [9]:
# One-hot encode the 'Interested Domain' column
data_encoded = pd.get_dummies(data, columns=['Interested Domain'], drop_first=True)

# Automatically encode 'Future Career'
label_encoder = LabelEncoder()
data_encoded['Career'] = label_encoder.fit_transform(data_encoded['Future Career'])

In [10]:
# Drop unnecessary columns
data_processed = data_encoded.drop(columns=['Student ID', 'Name', 'Gender', 'Future Career', 'Major', 'Projects'])

# Split features and target
X = data_processed.drop('Career', axis=1)
y = data_processed['Career']

# Train a KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

In [11]:
# Function to recommend careers using KNN
def recommend_careers(student_profile, knn_model, top_n=3):
    distances, indices = knn_model.kneighbors(student_profile)
    recommended_careers = y.iloc[indices[0]].values[:top_n]
    recommended_career_names = label_encoder.inverse_transform(recommended_careers)
    return recommended_career_names


Fetching Career Labels for Nearest Neighbors:


The indices[0] corresponds to the indices of the nearest neighbors.

indices[0] contains the positions of the top n closest students in the dataset.
Using these indices, the function retrieves the career labels from the target variable y (which holds the career labels of each student in the dataset).


The .values[:top_n] ensures that only the career labels of the closest students are selected and that the number of recommended careers is limited to the top_n (default is 3).

-------------------------------------------
Transforming Numeric Career Labels Back to Original:

The y values (careers) were encoded into numerical labels using a LabelEncoder earlier in the process (as seen in your code).

The inverse_transform method is used to convert these numeric career labels back to the original career names (e.g., from 0, 1, 2 back to "Software Developer", "Data Scientist", etc.).

This step ensures that the final recommendation is in the human-readable form (career names).

In [12]:
# Input function for a new student
def input_new_student():
    gpa = float(input("Enter GPA: "))
    python_skill = input("Python skill (Strong, Average, Weak): ")
    sql_skill = input("SQL skill (Strong, Average, Weak): ")
    java_skill = input("Java skill (Strong, Average, Weak): ")
    domain = input("Interested domain (from the available domains): ")

    python_skill_num = skill_mapping[python_skill]
    sql_skill_num = skill_mapping[sql_skill]
    java_skill_num = skill_mapping[java_skill]

    new_student_profile = pd.DataFrame({
        'GPA': [gpa],
        'Python': [python_skill_num],
        'SQL': [sql_skill_num],
        'Java': [java_skill_num]
    })

    domain_encoded = pd.get_dummies([domain], columns=['Interested Domain'], drop_first=True)
    new_student_profile = pd.concat([new_student_profile, domain_encoded], axis=1)

    missing_cols = set(data_processed.columns) - set(new_student_profile.columns)
    for col in missing_cols:
        new_student_profile[col] = 0

    new_student_profile = new_student_profile[X.columns]
    return new_student_profile

In [13]:
# Get input and recommend careers
new_student_profile = input_new_student()
recommended_careers_for_new_student = recommend_careers(new_student_profile, knn)

# Prepare the input for the generative model
careers_list = ", ".join(recommended_careers_for_new_student)

# Extract skills and input details for context
python_skill = new_student_profile.iloc[0]['Python']
sql_skill = new_student_profile.iloc[0]['SQL']
java_skill = new_student_profile.iloc[0]['Java']
gpa = new_student_profile.iloc[0]['GPA']

Enter GPA: 9.19
Python skill (Strong, Average, Weak): Average
SQL skill (Strong, Average, Weak): Strong
Java skill (Strong, Average, Weak): Strong
Interested domain (from the available domains): Data Scientist


In [14]:
skills_info = (
    f"The student has a GPA of {gpa}, with the following skill levels:\n"
    f"- Python: {'Strong' if python_skill == 2 else 'Average' if python_skill == 1 else 'Weak'}\n"
    f"- SQL: {'Strong' if sql_skill == 2 else 'Average' if sql_skill == 1 else 'Weak'}\n"
    f"- Java: {'Strong' if java_skill == 2 else 'Average' if java_skill == 1 else 'Weak'}\n"
)

prompt = (
    f"{skills_info}\n"
    f"Based on their profile, the system has recommended the following careers: {careers_list}. "
    f"Please provide a detailed, step-by-step guide on how the student can prepare and apply for these careers. "
    f"Include required skills, certifications, job search strategies, and networking tips."
)

In [15]:
# Use the GenerativeModel (flash) to generate content with temperature
flash = genai.GenerativeModel('gemini-1.5-flash')
response = flash.generate_content(prompt)
response_text = response.text

In [16]:
# Create a PDF report with FPDF
class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Career Recommendation Report', 0, 1, 'C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 11)
        # Encode the body text using utf-8
        self.multi_cell(0, 10, body.encode('utf-8').decode('latin-1'))
        self.ln()

# Create the PDF Report
pdf = PDFReport()
pdf.add_page()

# Add student's skills and profile information
pdf.chapter_title("Student Profile")
pdf.chapter_body(skills_info)

# Add recommended careers to PDF
pdf.chapter_title("Recommended Careers")
pdf.chapter_body(careers_list)

# Add LLM-generated response to PDF
pdf.chapter_title("Career Preparation Guide")
pdf.chapter_body(response_text)

# Save the PDF
output_pdf = "career_recommendation_report1.pdf"
pdf.output(output_pdf)
print(f"PDF Report saved as {output_pdf}")

PDF Report saved as career_recommendation_report1.pdf


In [17]:
import pickle

In [18]:
pickle.dump(knn,open('knn.pkl','wb'))

In [19]:
pickle_model = pickle.load(open('knn.pkl','rb'))

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
import pandas as pd
from fpdf import FPDF
from transformers import pipeline

# Load the dataset
data = pd.read_csv('/content/cs_students.csv')

# Map skill levels to numerical values
skill_mapping = {'Strong': 2, 'Average': 1, 'Weak': 0}
data['Python'] = data['Python'].map(skill_mapping)
data['SQL'] = data['SQL'].map(skill_mapping)
data['Java'] = data['Java'].map(skill_mapping)

# One-hot encode the 'Interested Domain' column
data_encoded = pd.get_dummies(data, columns=['Interested Domain'], drop_first=True)

# Automatically encode 'Future Career'
label_encoder = LabelEncoder()
data_encoded['Career'] = label_encoder.fit_transform(data_encoded['Future Career'])

# Drop unnecessary columns
data_processed = data_encoded.drop(columns=['Student ID', 'Name', 'Gender', 'Future Career', 'Major', 'Projects'])

# Split features and target
X = data_processed.drop('Career', axis=1)
y = data_processed['Career']

# Train a KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

# Function to recommend careers using KNN
def recommend_careers(student_profile, knn_model, top_n=3):
    distances, indices = knn_model.kneighbors(student_profile)
    recommended_careers = y.iloc[indices[0]].values[:top_n]
    recommended_career_names = label_encoder.inverse_transform(recommended_careers)
    return recommended_career_names

# Input function for a new student
def input_new_student():
    gpa = float(input("Enter GPA: "))
    python_skill = input("Python skill (Strong, Average, Weak): ")
    sql_skill = input("SQL skill (Strong, Average, Weak): ")
    java_skill = input("Java skill (Strong, Average, Weak): ")
    domain = input("Interested domain (from the available domains): ")

    python_skill_num = skill_mapping[python_skill]
    sql_skill_num = skill_mapping[sql_skill]
    java_skill_num = skill_mapping[java_skill]

    new_student_profile = pd.DataFrame({
        'GPA': [gpa],
        'Python': [python_skill_num],
        'SQL': [sql_skill_num],
        'Java': [java_skill_num]
    })

    domain_encoded = pd.get_dummies([domain], columns=['Interested Domain'], drop_first=True)
    new_student_profile = pd.concat([new_student_profile, domain_encoded], axis=1)

    missing_cols = set(data_processed.columns) - set(new_student_profile.columns)
    for col in missing_cols:
        new_student_profile[col] = 0

    new_student_profile = new_student_profile[X.columns]
    return new_student_profile

# Get input and recommend careers
new_student_profile = input_new_student()
recommended_careers_for_new_student = recommend_careers(new_student_profile, knn)

# Prepare the input for the generative model
careers_list = ", ".join(recommended_careers_for_new_student)

# Extract skills and input details for context
python_skill = new_student_profile.iloc[0]['Python']
sql_skill = new_student_profile.iloc[0]['SQL']
java_skill = new_student_profile.iloc[0]['Java']
gpa = new_student_profile.iloc[0]['GPA']

skills_info = (
    f"The student has a GPA of {gpa}, with the following skill levels:\n"
    f"- Python: {'Strong' if python_skill == 2 else 'Average' if python_skill == 1 else 'Weak'}\n"
    f"- SQL: {'Strong' if sql_skill == 2 else 'Average' if sql_skill == 1 else 'Weak'}\n"
    f"- Java: {'Strong' if java_skill == 2 else 'Average' if java_skill == 1 else 'Weak'}\n"
)

prompt = (
    f"{skills_info}\n"
    f"Based on their profile, the system has recommended the following careers: {careers_list}. "
    f"Please provide a detailed, step-by-step guide on how the student can prepare and apply for these careers. "
    f"Include required skills, certifications, job search strategies, and networking tips."
)

# Use the GenerativeModel (flash) to generate content with temperature
flash = genai.GenerativeModel('gemini-1.5-flash')
response = flash.generate_content(prompt)
response_text = response.text

# Create a PDF report with FPDF
class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Career Recommendation Report', 0, 1, 'C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 11)
        self.multi_cell(0, 10, body)
        self.ln()

# Create the PDF Report
pdf = PDFReport()
pdf.add_page()

# Add student's skills and profile information
pdf.chapter_title("Student Profile")
pdf.chapter_body(skills_info)

# Add recommended careers to PDF
pdf.chapter_title("Recommended Careers")
pdf.chapter_body(careers_list)

# Add LLM-generated response to PDF
pdf.chapter_title("Career Preparation Guide")
pdf.chapter_body(response_text)

# Save the PDF
output_pdf = "career_recommendation_report1.pdf"
pdf.output(output_pdf)
print(f"PDF Report saved as {output_pdf}")


Enter GPA: 9.19
Python skill (Strong, Average, Weak): Strong
SQL skill (Strong, Average, Weak): Strong
Java skill (Strong, Average, Weak): Weak
Interested domain (from the available domains): Machine Learning Engineer


UnicodeEncodeError: 'latin-1' codec can't encode character '\u2013' in position 1305: ordinal not in range(256)

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.preprocessing import LabelEncoder
from transformers import pipeline
import pandas as pd
from fpdf import FPDF

# Load the dataset
data = pd.read_csv('/content/cs_students.csv')

# Map skill levels to numerical values
skill_mapping = {'Strong': 2, 'Average': 1, 'Weak': 0}
data['Python'] = data['Python'].map(skill_mapping)
data['SQL'] = data['SQL'].map(skill_mapping)
data['Java'] = data['Java'].map(skill_mapping)

# One-hot encode the 'Interested Domain' column
data_encoded = pd.get_dummies(data, columns=['Interested Domain'], drop_first=True)

# Automatically encode 'Future Career'
label_encoder = LabelEncoder()
data_encoded['Career'] = label_encoder.fit_transform(data_encoded['Future Career'])

# Drop unnecessary columns
data_processed = data_encoded.drop(columns=['Student ID', 'Name', 'Gender', 'Future Career', 'Major', 'Projects'])

# Split features and target
X = data_processed.drop('Career', axis=1)
y = data_processed['Career']

# Train a KNN classifier
knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X, y)

# Function to recommend careers using KNN
def recommend_careers(student_profile, knn_model, top_n=3):
    distances, indices = knn_model.kneighbors(student_profile)
    recommended_careers = y.iloc[indices[0]].values[:top_n]
    recommended_career_names = label_encoder.inverse_transform(recommended_careers)
    return recommended_career_names

# Input function for a new student
def input_new_student():
    gpa = float(input("Enter GPA: "))
    python_skill = input("Python skill (Strong, Average, Weak): ")
    sql_skill = input("SQL skill (Strong, Average, Weak): ")
    java_skill = input("Java skill (Strong, Average, Weak): ")
    domain = input("Interested domain (from the available domains): ")

    python_skill_num = skill_mapping[python_skill]
    sql_skill_num = skill_mapping[sql_skill]
    java_skill_num = skill_mapping[java_skill]

    new_student_profile = pd.DataFrame({
        'GPA': [gpa],
        'Python': [python_skill_num],
        'SQL': [sql_skill_num],
        'Java': [java_skill_num]
    })

    domain_encoded = pd.get_dummies([domain], columns=['Interested Domain'], drop_first=True)
    new_student_profile = pd.concat([new_student_profile, domain_encoded], axis=1)

    missing_cols = set(data_processed.columns) - set(new_student_profile.columns)
    for col in missing_cols:
        new_student_profile[col] = 0

    new_student_profile = new_student_profile[X.columns]
    return new_student_profile

# Get input and recommend careers
new_student_profile = input_new_student()
recommended_careers_for_new_student = recommend_careers(new_student_profile, knn)

# Prepare a professional-looking PDF report with FPDF
class PDFReport(FPDF):
    def header(self):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, 'Career Recommendation Report', 0, 1, 'C')

    def footer(self):
        self.set_y(-15)
        self.set_font('Arial', 'I', 8)
        self.cell(0, 10, f'Page {self.page_no()}', 0, 0, 'C')

    def chapter_title(self, title):
        self.set_font('Arial', 'B', 12)
        self.cell(0, 10, title, 0, 1, 'L')
        self.ln(5)

    def chapter_body(self, body):
        self.set_font('Arial', '', 11)
        self.multi_cell(0, 10, body)
        self.ln()

# Create the PDF Report
pdf = PDFReport()
pdf.add_page()

# Add student's skills and profile information
skills_info = (
    f"GPA: {new_student_profile.iloc[0]['GPA']}\n"
    f"Python Skill: {'Strong' if new_student_profile.iloc[0]['Python'] == 2 else 'Average' if new_student_profile.iloc[0]['Python'] == 1 else 'Weak'}\n"
    f"SQL Skill: {'Strong' if new_student_profile.iloc[0]['SQL'] == 2 else 'Average' if new_student_profile.iloc[0]['SQL'] == 1 else 'Weak'}\n"
    f"Java Skill: {'Strong' if new_student_profile.iloc[0]['Java'] == 2 else 'Average' if new_student_profile.iloc[0]['Java'] == 1 else 'Weak'}\n"
)

pdf.chapter_title("Student Profile")
pdf.chapter_body(skills_info)

# Add recommended careers to PDF
pdf.chapter_title("Recommended Careers")
pdf.chapter_body(", ".join(recommended_careers_for_new_student))

# Generate and save the PDF report
output_pdf = "career_recommendation_report.pdf"
pdf.output(output_pdf)
print(f"PDF Report saved as {output_pdf}")


Enter GPA: 9.1
Python skill (Strong, Average, Weak): Strong
SQL skill (Strong, Average, Weak): Strong
Java skill (Strong, Average, Weak): Weak
Interested domain (from the available domains): Data Scientist
PDF Report saved as career_recommendation_report.pdf
