In [None]:
# Task 1

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder


data = pd.read_csv('CarPrice_Assignment.csv')

print("\n", data.shape)
print("\n", data.dtypes)
print("\n", data.describe())


# Data Wrangling
data = data.drop(columns=['car_ID', 'CarName'])  # Dropping unnecessary columns

# Encode categorical columns
categorical_columns = data.select_dtypes(include=['object']).columns
for col in categorical_columns:
    le = LabelEncoder()
    data[col] = le.fit_transform(data[col])



X = data.drop(columns=['price'])
y = data['price']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Linear Regression Model
model = LinearRegression()
model.fit(X_train, y_train)

y_train_pred = model.predict(X_train)
y_test_pred = model.predict(X_test)

# Evaluation Metrics
print("\n")
print("Training Evaluation:")
print(f"Mean Absolute Error: {mean_absolute_error(y_train, y_train_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_train, y_train_pred)}")
print(f"R-squared: {r2_score(y_train, y_train_pred)}\n")

print("Testing Evaluation:")
print(f"Mean Absolute Error: {mean_absolute_error(y_test, y_test_pred)}")
print(f"Mean Squared Error: {mean_squared_error(y_test, y_test_pred)}")
print(f"R-squared: {r2_score(y_test, y_test_pred)}\n")


# Plotting
plt.figure(figsize=(14, 6))

# Plot Training
plt.subplot(1, 2, 1)
plt.scatter(y_train, y_train_pred, alpha=0.7, color='blue')
plt.plot([y_train.min(), y_train.max()], [y_train.min(), y_train.max()], lw=2)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("(Training) Actual vs Predicted Price")

# Plot Testing
plt.subplot(1, 2, 2)
plt.scatter(y_test, y_test_pred, alpha=0.7, color='green')
plt.plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()],  lw=2)
plt.xlabel("Actual Price")
plt.ylabel("Predicted Price")
plt.title("(Testing) Actual vs Predicted Price")

plt.tight_layout()
plt.show()
