# Practical Example: Regression Analysis

This notebook contains the solutions to the practical examples for Regression Analysis.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import statsmodels.api as sm

# Set style
sns.set(style='whitegrid')

# Load data
file_path = '5.21.+Regression_Analysis_practical_example.xlsx'
df = pd.read_excel(file_path, sheet_name='Dataset', header=3)

# Display first few rows
df.head()

## Task 1: Scatter Plots
**Create scatter plots to visualize relationships between Price and other variables.**

In [None]:
# Price vs Mileage
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Mileage', y='Price', data=df)
plt.title('Price vs Mileage')
plt.show()

# Price vs EngineV
plt.figure(figsize=(10, 6))
sns.scatterplot(x='EngineV', y='Price', data=df)
plt.title('Price vs Engine Volume')
plt.show()

# Price vs Year
plt.figure(figsize=(10, 6))
sns.scatterplot(x='Year', y='Price', data=df)
plt.title('Price vs Year')
plt.show()

## Task 2: Correlations
**Calculate the correlation matrix.**

In [None]:
numeric_df = df.select_dtypes(include=[np.number])
correlation_matrix = numeric_df.corr()
print(correlation_matrix)

plt.figure(figsize=(10, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt='.2f')
plt.title('Correlation Matrix')
plt.show()

## Task 3: Simple Linear Regression
**Perform simple linear regression: Price vs Year (or other single variable).**

In [None]:
y = df['Price']
x1 = df['Year']

x = sm.add_constant(x1)
results = sm.OLS(y, x).fit()
results.summary()

## Task 4: Multiple Linear Regression
**Perform multiple linear regression: Price vs Year, EngineV, Mileage.**

In [None]:
y = df['Price']
x1 = df[['Year', 'EngineV', 'Mileage']]

x = sm.add_constant(x1)
results_mult = sm.OLS(y, x).fit()
results_mult.summary()

## Task 5: Model Comparison
**Compare R-squared and Adjusted R-squared.**

The Adjusted R-squared penalizes for adding variables that do not improve the model. In the multiple regression model, check if the Adjusted R-squared is significantly lower than R-squared.