In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the data
df = pd.read_csv('job-data.csv')

# Exclude "joining_date" column
df_exclude_date = df.drop(columns=["joining_date"])

# Perform one-hot encoding for categorical variables
df_encoded = pd.get_dummies(df_exclude_date, columns=["job_title", "required_experience", "Industary_Sector", "Demanding_Of_the_Job", "Overall_Satisfication", "Working_model", "Considering_Job_Switching_in_Future", "Main_Reasons"])

# Separate features (X) and target variable (y)
X = df_encoded.drop(columns=["Salary"])
y = df_encoded["Salary"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the linear regression model
model = LinearRegression()
model.fit(X_train, y_train)
# print(model.score(X_test, y_test))

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)


Mean Squared Error: 1.5875725223695224e+34
Mean Absolute Error: 2.7091105105020868e+16
R-squared: -8.936530634038134e+24


In [17]:
# Define tolerance level
tolerance = 0.05  # 5% tolerance

# Calculate absolute percentage error
abs_percentage_error = abs((y_pred - y_test) / y_test)

# Calculate accuracy within tolerance level
accuracy = (abs_percentage_error <= tolerance).mean() * 100

print("Accuracy within ±{}%: {:.2f}%".format(tolerance * 100, accuracy))


Accuracy within ±5.0%: 12.00%


In [18]:
model.score(X_test, y_test)

-8.936530634038134e+24

In [19]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Load the data
df = pd.read_csv('job-data.csv')

# Exclude "joining_date" column
df_exclude_date = df.drop(columns=["joining_date"])

# Perform one-hot encoding for categorical variables
df_encoded = pd.get_dummies(df_exclude_date, columns=["job_title", "required_experience", "Industary_Sector", "Demanding_Of_the_Job", "Overall_Satisfication", "Working_model", "Considering_Job_Switching_in_Future", "Main_Reasons"])

# Separate features (X) and target variable (y)
X = df_encoded.drop(columns=["Salary"])
y = df_encoded["Salary"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the KNN regression model
k = 5  # Number of neighbors
knn_model = KNeighborsRegressor(n_neighbors=k)
knn_model.fit(X_train, y_train)

# Make predictions
y_pred = knn_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2)


Mean Squared Error: 1294447300.0
Mean Absolute Error: 27350.0
R-squared: 0.2713486919430682


In [24]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load the data
df = pd.read_csv('job-data.csv')

# Exclude "joining_date" column
df_exclude_date = df.drop(columns=["joining_date"])

# Perform label encoding for categorical variables
label_encoder = LabelEncoder()
df_encoded = df_exclude_date.apply(label_encoder.fit_transform)

# Separate features (X) and target variable (y)
X = df_encoded.drop(columns=["Salary"])
y = df_encoded["Salary"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the KNN regression model
k = 5  # Number of neighbors
knn_model = KNeighborsRegressor(n_neighbors=k)
knn_model.fit(X_train, y_train)

# Make predictions
y_pred = knn_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2*100)


Mean Squared Error: 197.72560000000004
Mean Absolute Error: 10.588
R-squared: 33.353557252567064


# Fiaz Nawaz 

In [25]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import LabelEncoder

# Load the data
df = pd.read_csv('job-data-2.csv')

# Exclude "joining_date" column
df_exclude_date = df.drop(columns=["joining_date"])

# Perform label encoding for categorical variables
label_encoder = LabelEncoder()
df_encoded = df_exclude_date.apply(label_encoder.fit_transform)

# Separate features (X) and target variable (y)
X = df_encoded.drop(columns=["Salary"])
y = df_encoded["Salary"]

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build the KNN regression model
k = 5  # Number of neighbors
knn_model = KNeighborsRegressor(n_neighbors=k)
knn_model.fit(X_train, y_train)

# Make predictions
y_pred = knn_model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print("Mean Squared Error:", mse)
print("Mean Absolute Error:", mae)
print("R-squared:", r2*100)

Mean Squared Error: 26.302500000000002
Mean Absolute Error: 4.0625
R-squared: -0.09573361082206855
