In [2]:
import pandas as pd
import sqlite3
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation

def load_and_clean_data():
    df = pd.read_csv("survey_results_public.csv")  # Update with correct filename
    
    # Selecting relevant columns
    columns_needed = [
        "DevType", "YearsCodePro", "WorkExp", "LanguageHaveWorkedWith", 
        "DatabaseHaveWorkedWith", "ToolsTechHaveWorkedWith", "AIToolCurrently Using", 
        "AIToolInterested in Using", "CompTotal", "JobSat", "Industry"
    ]
    df = df[columns_needed].dropna()
    
    return df

def extract_trending_skills(df):
    vectorizer = TfidfVectorizer(stop_words='english')
    X = vectorizer.fit_transform(df['LanguageHaveWorkedWith'].astype(str))
    lda = LatentDirichletAllocation(n_components=5, random_state=42)
    lda.fit(X)
    feature_names = vectorizer.get_feature_names_out()
    trending_skills = [feature_names[i] for i in lda.components_[0].argsort()[-10:]]
    return trending_skills

def save_to_database(df):
    conn = sqlite3.connect("career_trends.db")
    df.to_sql("job_data", conn, if_exists="replace", index=False)
    conn.close()

df = load_and_clean_data()
skills = extract_trending_skills(df)
save_to_database(df)
print("Data processing complete. Trending skills:", skills)



Data processing complete. Trending skills: ['javascript', 'java', 'clojure', 'basic', 'visual', 'net', 'vba', 'elixir', 'sql', 'python']
