In [None]:
# Review each command below and add a detailed comment. When structured in a small block, you can comment the block, but be sure to not forgo detail.

import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt
import seaborn as sns

data = {
    'Region': ['North', 'South', 'East', 'West', 'North', 'South', 'East', 'West', 'North', 'South', 'East', 'West', 'North', 'South', 'East', 'West'],
    'Dealership_Size': [2500, 3000, 1800, 2200, 2700, 3200, 1900, 2300, 2600, 3100, 2000, 2400, 2800, 3300, 2100, 2500],
    'Marketing_Spend': [50000, 60000, 45000, 48000, 52000, 62000, 47000, 49000, 51000, 61000, 46000, 50000, 53000, 63000, 48000, 52000],
    'Customer_Interactions': [300, 400, 280, 310, 330, 420, 290, 320, 350, 430, 300, 340, 370, 440, 310, 360],
    'Sales': [120, 150, 100, 110, 130, 160, 105, 115, 125, 155, 110, 120, 140, 170, 115, 130]
}

df = pd.DataFrame(data)
print(df)

print("Missing values in each column:")
print(df.isnull().sum())

df['Customer_Interactions'] = df['Customer_Interactions'].fillna(df['Customer_Interactions'].mean())
df = df.drop_duplicates()
df['Marketing_Spend'] = df['Marketing_Spend'].astype(float)

threshold = df['Sales'].quantile(0.95)
df = df[df['Sales'] < threshold]

df = pd.get_dummies(df, columns=['Region'], drop_first=True)

X = df[['Dealership_Size', 'Marketing_Spend', 'Customer_Interactions', 'Region_North', 'Region_West', 'Region_South']]
y = df['Sales']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f'Coefficients: {model.coef_}')
print(f'Intercept: {model.intercept_}')
print(f'Mean squared error: {mse}')
print(f'R-squared value: {r2}')
