In [2]:
import pandas as pd
import pickle
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.decomposition import LatentDirichletAllocation

def load_and_clean_data():
    df = pd.read_csv("survey_results_public.csv")  # Update with correct filename
    
    # Selecting relevant columns
    columns_needed = [
        "DevType", "YearsCodePro", "WorkExp", "LanguageHaveWorkedWith", 
        "DatabaseHaveWorkedWith", "ToolsTechHaveWorkedWith", "AIToolCurrently Using", 
        "AIToolInterested in Using", "CompTotal", "JobSat", "Industry"
    ]
    df = df[columns_needed].dropna()
    
    return df

def extract_trending_skills(df):
    vectorizer = TfidfVectorizer(stop_words='english')
    X = vectorizer.fit_transform(df['LanguageHaveWorkedWith'].astype(str))
    lda = LatentDirichletAllocation(n_components=5, random_state=42)
    lda.fit(X)
    feature_names = vectorizer.get_feature_names_out()
    trending_skills = [feature_names[i] for i in lda.components_[0].argsort()[-10:]]
    return trending_skills

def save_to_pickle(df, filename="career_trends.pkl"):
    """ Saves the dataframe to a pickle file instead of a database """
    with open(filename, "wb") as f:
        pickle.dump(df, f)

def load_from_pickle(filename="career_trends.pkl"):
    """ Loads the dataframe from the pickle file """
    with open(filename, "rb") as f:
        return pickle.load(f)

# Process and save data
df = load_and_clean_data()
skills = extract_trending_skills(df)
save_to_pickle(df)

print("Data processing complete. Trending skills:", skills)




Data processing complete. Trending skills: ['javascript', 'java', 'clojure', 'basic', 'visual', 'net', 'vba', 'elixir', 'sql', 'python']
