In [1]:
import sqlite3

conn = sqlite3.connect("resumes.db")
cursor = conn.cursor()

cursor.execute("""
CREATE TABLE IF NOT EXISTS Candidate (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT,
    email TEXT,
    phone TEXT,
    location TEXT
)
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS Education (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    candidate_id INTEGER,
    degree TEXT,
    institution TEXT,
    start_year TEXT,
    end_year TEXT,
    FOREIGN KEY(candidate_id) REFERENCES Candidate(id)
)
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS Experience (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    candidate_id INTEGER,
    company TEXT,
    position TEXT,
    start_date TEXT,
    end_date TEXT,
    duration TEXT,
    FOREIGN KEY(candidate_id) REFERENCES Candidate(id)
)
""")

cursor.execute("""
CREATE TABLE IF NOT EXISTS Skills (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    candidate_id INTEGER,
    skill TEXT,
    FOREIGN KEY(candidate_id) REFERENCES Candidate(id)
)
""")

conn.commit()
conn.close()


In [2]:
candidate = {"name": "John Doe", "email": "john@email.com", "phone": "08012345678", "location": "Abuja"}

education = [
    {"degree": "B.Sc in Computer Science", "institution": "UNILAG", "start_year": "2015", "end_year": "2019"}
]

experience = [
    {"company": "Initech", "position": "Developer", "start_date": "2020-01", "end_date": "2022-01", "duration": "2 years"}
]

skills = ["Python", "SQL", "Django"]


In [3]:
conn = sqlite3.connect("resumes.db")
cursor = conn.cursor()

cursor.execute("INSERT INTO Candidate (name, email, phone, location) VALUES (?, ?, ?, ?)", 
               (candidate["name"], candidate["email"], candidate["phone"], candidate["location"]))
candidate_id = cursor.lastrowid

for edu in education:
    cursor.execute("""
    INSERT INTO Education (candidate_id, degree, institution, start_year, end_year) 
    VALUES (?, ?, ?, ?, ?)""",
    (candidate_id, edu["degree"], edu["institution"], edu["start_year"], edu["end_year"]))

for exp in experience:
    cursor.execute("""
    INSERT INTO Experience (candidate_id, company, position, start_date, end_date, duration)
    VALUES (?, ?, ?, ?, ?, ?)""",
    (candidate_id, exp["company"], exp["position"], exp["start_date"], exp["end_date"], exp["duration"]))

for skill in skills:
    cursor.execute("INSERT INTO Skills (candidate_id, skill) VALUES (?, ?)", (candidate_id, skill))

conn.commit()
conn.close()


In [6]:
import pandas as pd
conn = sqlite3.connect("resumes.db")

df = pd.read_sql_query("""
SELECT Candidate.name, Candidate.email
FROM Candidate
JOIN Skills ON Candidate.id = Skills.candidate_id
WHERE Skills.skill = 'Python'
""", conn)

print(df)
conn.close()


       name               email
0  Jane Doe  jane.doe@email.com
1  John Doe      john@email.com


In [7]:
conn = sqlite3.connect("resumes.db")

df = pd.read_sql_query("""
SELECT location, COUNT(*) as num_candidates
FROM Candidate
GROUP BY location
""", conn)

print(df)
conn.close()


  location  num_candidates
0    Abuja               1
1    Lagos               1


In [22]:
import pandas as pd
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report

# 1. Load
df = pd.read_csv(r"C:\Users\HP\Downloads\synthetic_resume_data.csv", converters={"skills": eval})

# 2. Encode skills
mlb = MultiLabelBinarizer()
X   = mlb.fit_transform(df["skills"])
y   = df["category"]

# 3. Stratified split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, stratify=y, random_state=42
)

# 4. Train with stronger regularisation (C smaller) to avoid overfit
model = LogisticRegression(max_iter=3000, C=0.5)
model.fit(X_train, y_train)

# 5. Evaluate
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))


Accuracy: 1.0
                  precision    recall  f1-score   support

         Backend       1.00      1.00      1.00        11
Business Analyst       1.00      1.00      1.00         9
    Data Analyst       1.00      1.00      1.00        12
   Data Engineer       1.00      1.00      1.00         9
  Data Scientist       1.00      1.00      1.00         8
          DevOps       1.00      1.00      1.00        10
        Frontend       1.00      1.00      1.00         8
Mobile Developer       1.00      1.00      1.00         8

        accuracy                           1.00        75
       macro avg       1.00      1.00      1.00        75
    weighted avg       1.00      1.00      1.00        75

