In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from wordcloud import WordCloud
import torch
from diffusers import StableDiffusionPipeline


In [None]:
# Dataset Preparation
job_data = pd.read_csv('job_market_data.csv')
job_data['clean_text'] = job_data['Job Description'].str.lower()
labels = job_data['Category']

In [None]:
# Text Vectorization
vectorizer = CountVectorizer(stop_words='english')
X = vectorizer.fit_transform(job_data['clean_text'])
y = labels

In [None]:
# Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Model Training
model = MultinomialNB()
history = []  # To store accuracy and loss for each epoch

In [None]:
for epoch in range(1, 6):  # Simulate epochs for demonstration
    model.partial_fit(X_train, y_train, classes=np.unique(y))
    predictions = model.predict(X_train)
    accuracy = accuracy_score(y_train, predictions)
    loss = -np.mean(model.feature_log_prob_)  # Using log-prob as a proxy for loss
    history.append((epoch, accuracy, loss))
    print(f"Epoch {epoch}: Accuracy = {accuracy:.4f}, Loss = {loss:.4f}")

In [None]:
# Final Predictions and Accuracy
test_predictions = model.predict(X_test)
test_accuracy = accuracy_score(y_test, test_predictions)
print("Final Test Accuracy:", test_accuracy)

In [None]:
# Confusion Matrix
cm = confusion_matrix(y_test, test_predictions)
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.show()


In [None]:
# Classification Report
print(classification_report(y_test, test_predictions))

In [None]:
# Variable Importance Visualization
feature_names = vectorizer.get_feature_names_out()
feature_probs = model.feature_log_prob_
for i, label in enumerate(model.classes_):
    top_features = np.argsort(feature_probs[i])[-10:]
    print(f"{label}: {[feature_names[j] for j in top_features]}")


In [None]:
# Word Cloud
wordcloud = WordCloud(stopwords=set(), background_color='white', max_words=100).generate(' '.join(job_data['clean_text']))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()


In [None]:
# Plot Accuracy and Loss over Epochs
epochs, accuracies, losses = zip(*history)
plt.figure(figsize=(10, 5))
plt.subplot(1, 2, 1)
plt.plot(epochs, accuracies, marker='o', label='Accuracy')
plt.title('Training Accuracy Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()

In [None]:
plt.subplot(1, 2, 2)
plt.plot(epochs, losses, marker='o', label='Loss', color='orange')
plt.title('Training Loss Over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()


In [None]:
plt.tight_layout()
plt.show()


In [None]:
# Stable Diffusion Visualization Based on Model Output
pipeline = StableDiffusionPipeline.from_pretrained("CompVis/stable-diffusion-v1-4")
pipeline = pipeline.to("cpu")


In [None]:
# Generate prompt based on the model output
category_summary = "\n".join([f"Category: {label}, Test Accuracy: {round(test_accuracy * 100, 2)}%" for label in model.classes_])
job_prompt = f"A futuristic depiction of the job market trends based on: {category_summary}"
image = pipeline(job_prompt).images[0]


In [None]:
plt.imshow(image)
plt.title("Stable Diffusion Output Based on Model Analysis")
plt.axis('off')
plt.show()