In [1]:
import pandas as pd
import numpy as np
import ast
import re
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, InputLayer
import joblib

In [3]:
data = pd.read_csv('/content/sample_data/internship_data.csv')

In [4]:
def clean_location(loc_str):
    if pd.isna(loc_str):
        return ""
    return loc_str.strip("()").replace("'", "")

def parse_duration(dur):
    if pd.isna(dur):
        return 0
    match = re.search(r"(\d+)", str(dur))
    return int(match.group(1)) if match else 0

def parse_stipend(stipend):
    if pd.isna(stipend) or "Unpaid" in str(stipend):
        return 0
    nums = re.findall(r"\d+", stipend.replace(",", ""))
    if len(nums) == 1:
        return int(nums[0])
    elif len(nums) == 2:
        return (int(nums[0]) + int(nums[1])) // 2
    return 0

def parse_skills(sk):
    if pd.isna(sk):
        return []
    try:
        return ast.literal_eval(sk)
    except:
        return [sk]

In [5]:
data["Location"] = data["Location"].apply(clean_location)
data["Duration"] = data["Duration"].apply(parse_duration)
data["Stipend"] = data["Stipend"].apply(parse_stipend)
data["Skills"] = data["Skills"].apply(parse_skills)

In [6]:
def filter_internships(df, profile):
    df_filtered = df.copy()
    if profile["location"]:
        pattern = "|".join([re.escape(loc) for loc in profile["location"]])
        df_filtered = df_filtered[df_filtered["Location"].str.contains(pattern, case=False, na=False)]

    def skills_match(row_skills, candidate_skills):
        if not candidate_skills:
            return True
        row_skills_lower = [s.lower() for s in row_skills]
        return any(skill.lower() in row_skills_lower for skill in candidate_skills)

    df_filtered.loc[:, "SkillsMatch"] = df_filtered["Skills"].apply(
        lambda x: skills_match(x, profile["skills"])
    )
    return df_filtered[df_filtered["SkillsMatch"]]

In [8]:
candidate_profile = {
    "education": "Graduation",
    "skills": ["Python", "Data Analysis"],
    "location": ["Delhi"]
}

filtered_data = filter_internships(data, candidate_profile)

if "Website Link" not in filtered_data.columns:
    filtered_data["Website Link"] = ""

In [9]:
le_location = LabelEncoder()
le_company = LabelEncoder()

filtered_data["Location_enc"] = le_location.fit_transform(filtered_data["Location"])
filtered_data["Company_enc"] = le_company.fit_transform(filtered_data["Company Name"])

X = filtered_data[["Location_enc", "Stipend", "Duration"]]
y = np.ones(len(X))  # Dummy labels

scaler = StandardScaler()
X = scaler.fit_transform(X)

In [10]:
model = Sequential([
    InputLayer(shape=(X.shape[1],)),
    Dense(64, activation="relu"),
    Dense(32, activation="relu"),
    Dense(1, activation="sigmoid")
])

model.compile(optimizer="adam", loss="binary_crossentropy")
model.fit(X, y, epochs=5, verbose=0)

scores = model.predict(X).flatten()
filtered_data["Score"] = scores

top_internships = filtered_data.sort_values(by="Score", ascending=False).head(5)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 69ms/step


In [11]:
for idx, row in top_internships.iterrows():
    print(f"Internship: {row['Role']} at {row['Company Name']} in {row['Location']}")
    print(f"Stipend: ₹{row['Stipend']} - Duration: {row['Duration']} months")
    print(f"Skills Required: {', '.join(row['Skills'])}")
    print(f"Apply Here: {row['Website Link']}")
    print("-" * 50)

Internship: Machine Learning Internship (Part time) at Avaari in Amritsar, Chennai, Delhi, Ghaziabad, Gurgaon, Kolkata, Pune, Bangalore, Hyderabad, Mumbai, Kochi, Jaipur, Andhra Pradesh, Karnataka,
Stipend: ₹25000 - Duration: 2 months
Skills Required: Data Analytics, Data Science, Deep Learning, Machine Learning, Natural Language Processing (NLP), Python, R Programming
Apply Here: nan
--------------------------------------------------
Internship: Artificial Intelligence (AI) Internship (Part time) at Avaari in Ahmedabad, Chandigarh, Chennai, Coimbatore, Delhi, Guwahati, Gurgaon, Indore, Bangalore, Hyderabad, Bhopal, Kochi, Noida, Raipur, Ludhiana,
Stipend: ₹25000 - Duration: 1 months
Skills Required: Deep Learning, Machine Learning, Natural Language Processing (NLP), Python
Apply Here: nan
--------------------------------------------------
Internship: Data Entry Internship at Bhavya Luhadia in Gurgaon, Gurgaon, Chennai, Delhi, Ghaziabad, Gurgaon, Indore, Lucknow, Patna, Pune, Udaipur, 

In [12]:
model.save("internship_model.keras")
joblib.dump(le_location, "le_location.pkl")
joblib.dump(le_company, "le_company.pkl")
joblib.dump(scaler, "scaler.pkl")

['scaler.pkl']