In [None]:

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, RidgeCV, LassoCV 
from sklearn.metrics import mean_squared_error, r2_score


In [7]:
#loading the dataset in
df = pd.read_csv("data/ai_job_market.csv")
print("Rows and Columns:", df.shape)
df.head()

Rows and Columns: (2000, 12)


Unnamed: 0,job_id,company_name,industry,job_title,skills_required,experience_level,employment_type,location,salary_range_usd,posted_date,company_size,tools_preferred
0,1,Foster and Sons,Healthcare,Data Analyst,"NumPy, Reinforcement Learning, PyTorch, Scikit...",Mid,Full-time,"Tracybury, AR",92860-109598,2025-08-20,Large,"KDB+, LangChain"
1,2,"Boyd, Myers and Ramirez",Tech,Computer Vision Engineer,"Scikit-learn, CUDA, SQL, Pandas",Senior,Full-time,"Lake Scott, CU",78523-144875,2024-03-22,Large,"FastAPI, KDB+, TensorFlow"
2,3,King Inc,Tech,Quant Researcher,"MLflow, FastAPI, Azure, PyTorch, SQL, GCP",Entry,Full-time,"East Paige, CM",124496-217204,2025-09-18,Large,"BigQuery, PyTorch, Scikit-learn"
3,4,"Cooper, Archer and Lynch",Tech,AI Product Manager,"Scikit-learn, C++, Pandas, LangChain, AWS, R",Mid,Full-time,"Perezview, FI",50908-123743,2024-05-08,Large,"TensorFlow, BigQuery, MLflow"
4,5,Hall LLC,Finance,Data Scientist,"Excel, Keras, SQL, Hugging Face",Senior,Contract,"North Desireeland, NE",98694-135413,2025-02-24,Large,"PyTorch, LangChain"


In [9]:
# Creating the Train-Test Split
X_train, X_test, y_train, y_test = train_test_split(X_full, y, test_size=0.2, random_state=67)
print("Train shape:", X_train.shape, "Test shape:", X_test.shape)

Train shape: (1600, 551) Test shape: (400, 551)


In [None]:
#Vanilla Linear Regression
vanilla_lr = LinearRegression().fit(X_train, y_train)

#predictions
y_train_pred_vanilla = vanilla_lr.predict(X_train)
y_test_pred_vanilla = vanilla_lr.predict(X_test)

#printing out metrics
print("R^2 and MSE for y_train")
print("MSE:")
print(mean_squared_error(y_train, y_train_pred_vanilla))
print("R^2:")
print(r2_score(y_train, y_train_pred_vanilla))
print("R^2 and MSE for y_test")
print("MSE:")
print(mean_squared_error(y_test, y_test_pred_vanilla))
print("R^2:")
print(r2_score(y_test, y_test_pred_vanilla))

R^2 and MSE for y_train
MSE:
816613719.9649711
R^2:
0.3267613445237222
R^2 and MSE for y_test
MSE:
1845560864.4123168
R^2:
-0.5485202913458804


In [18]:
#Lasso Regression with Cross-Validation
alphas = np.logspace(-4, 2, 10)
lasso_cv = LassoCV(cv=5, random_state=67).fit(X_train, y_train)

#predictions
y_train_pred_lasso = lasso_cv.predict(X_train)
y_test_pred_lasso = lasso_cv.predict(X_test)

In [19]:
#printing out metrics
print("R^2 and MSE for y_train with Lasso")
print("MSE:")
print(mean_squared_error(y_train, y_train_pred_lasso))
print("R^2:")
print(r2_score(y_train, y_train_pred_lasso))
print("R^2 and MSE for y_test with Lasso")
print("MSE:")
print(mean_squared_error(y_test, y_test_pred_lasso))
print("R^2:")
print(r2_score(y_test, y_test_pred_lasso))

R^2 and MSE for y_train with Lasso
MSE:
1212963208.9934874
R^2:
5.329070518200751e-15
R^2 and MSE for y_test with Lasso
MSE:
1192100046.570008
R^2:
-0.00023312534634101212


In [23]:
#Ridge Regression with Cross-Validation
alphas = np.logspace(-4, 2, 10)
ridge_cv = RidgeCV(alphas=alphas, cv=len(alphas)).fit(X_train, y_train)

#predictions
y_train_pred_ridge = ridge_cv.predict(X_train)
y_test_pred_ridge = ridge_cv.predict(X_test)

#printing out metrics
print("R^2 and MSE for y_train with Ridge")
print("MSE:")
print(mean_squared_error(y_train, y_train_pred_ridge))
print("R^2:")
print(r2_score(y_train, y_train_pred_ridge))
print("R^2 and MSE for y_test with Ridge")
print("MSE:")
print(mean_squared_error(y_test, y_test_pred_ridge))
print("R^2:")
print(r2_score(y_test, y_test_pred_ridge))


R^2 and MSE for y_train with Ridge
MSE:
1180515053.1680284
R^2:
0.026751145941508403
R^2 and MSE for y_test with Ridge
MSE:
1204709418.5203276
R^2:
-0.010813035607071031


Plan
1. Vanilla Linear Regression 
2. Lasso and Ridge Regression 
3. Pure Decision Tree
4. Random Forest, 