In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import cross_val_score


# Load the dataset
data = pd.read_csv("Salary_dataset.csv")

# Extracting features and target variable
X = data.iloc[:, :-1].values  # Assuming the independent variable is in the first column
y = data.iloc[:, -1].values   # Assuming the dependent variable (salary) is in the last column

# Splitting the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression
model = LinearRegression()
model.fit(X_train, y_train)

# Model Evaluation
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f"Linear Regression Test MSE: {mse}")

# Polynomial Regression (Degree 2)
poly = PolynomialFeatures(degree=2)
X_poly = poly.fit_transform(X_train)
model_poly = LinearRegression()
model_poly.fit(X_poly, y_train)

# Model Evaluation - Polynomial Regression
X_test_poly = poly.transform(X_test)
y_pred_poly = model_poly.predict(X_test_poly)
mse_poly = mean_squared_error(y_test, y_pred_poly)
print(f"Polynomial Regression (Degree 2) Test MSE: {mse_poly}")

# Cross-validation
cross_val_scores = cross_val_score(model, X, y, cv=5, scoring='neg_mean_squared_error')
print(f"Cross-validated MSE for Linear Regression: {-cross_val_scores.mean()}")

# Cross-validation for Polynomial Regression
cross_val_scores_poly = cross_val_score(model_poly, poly.transform(X), y, cv=5, scoring='neg_mean_squared_error')
print(f"Cross-validated MSE for Polynomial Regression: {-cross_val_scores_poly.mean()}")

Saving Salary_dataset.csv to Salary_dataset (3).csv
Linear Regression Test MSE: 55494098.13142202
Polynomial Regression (Degree 2) Test MSE: 64168200.124699205
Cross-validated MSE for Linear Regression: 78625892.88185735
Cross-validated MSE for Polynomial Regression: 70662205.86009356
