# House Sales Analysis in King County

## Step 1: Load and Preview Dataset

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler, PolynomialFeatures

# Load dataset
df = pd.read_csv('kc_house_data.csv')
df.head()

## Step 2: Check Data Types

In [None]:
df.dtypes

## Step 3: Clean and Describe Dataset

In [None]:
df.drop(['id', 'Unnamed: 0'], axis=1, inplace=True, errors='ignore')
df.describe()

## Step 4: Count Unique Floor Values

In [None]:
df['floors'].value_counts().to_frame()

## Step 5: Boxplot of Price by Waterfront

In [None]:
sns.boxplot(x='waterfront', y='price', data=df)
plt.title('Boxplot of Price by Waterfront')
plt.tight_layout()
plt.show()

## Step 6: Regplot of sqft_above vs Price

In [None]:
sns.regplot(x='sqft_above', y='price', data=df)
plt.title('Regplot: sqft_above vs Price')
plt.tight_layout()
plt.show()

## Step 7: Linear Regression on sqft_living

In [None]:
X = df[['sqft_living']]
y = df['price']
lm = LinearRegression()
lm.fit(X, y)
lm.score(X, y)  # RÂ²

## Step 8: Multiple Linear Regression

In [None]:
features = ['floors', 'waterfront', 'lat', 'bedrooms', 'sqft_basement',
            'view', 'bathrooms', 'sqft_living15', 'sqft_above', 'grade', 'sqft_living']
X_mult = df[features]
y_mult = df['price']
lm.fit(X_mult, y_mult)
lm.score(X_mult, y_mult)

## Step 9: Polynomial Regression Pipeline

In [None]:
pipe = Pipeline([
    ('scale', StandardScaler()),
    ('poly', PolynomialFeatures(degree=2)),
    ('model', LinearRegression())
])
pipe.fit(X_mult, y_mult)
pipe.score(X_mult, y_mult)

## Step 10: Ridge Regression

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X_mult, y_mult, test_size=0.2, random_state=1)
ridge = Ridge(alpha=0.1)
ridge.fit(X_train, y_train)
ridge.score(X_test, y_test)

## Step 11: Polynomial + Ridge Regression

In [None]:
poly2 = PolynomialFeatures(degree=2)
X_train_poly = poly2.fit_transform(X_train)
X_test_poly = poly2.transform(X_test)
ridge.fit(X_train_poly, y_train)
ridge.score(X_test_poly, y_test)