<a href="https://colab.research.google.com/github/SoneyBun/Wyatt/blob/main/Wyatt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import matplotlib.pyplot as plt

In [None]:
virginia = pd.read_csv("Virginia_Housing.csv")
ames = pd.read_csv("AmesHousing.csv")

In [None]:
print("Virginia Housing Columns:\n", virginia.columns.tolist(), "\n")
print("Ames Housing Columns:\n", ames.columns.tolist(), "\n")

In [None]:
virginia.columns = virginia.columns.str.strip()
ames.columns = ames.columns.str.strip()

In [None]:
virginia = virginia.loc[:, ~virginia.columns.duplicated()].copy()
ames = ames.loc[:, ~ames.columns.duplicated()].copy()
virginia = virginia.reset_index(drop=True)
ames = ames.reset_index(drop=True)

In [None]:
rename_map = {}
for col in ames.columns:
    c = col.lower()
    if "grliv" in c or "sqft" in c or "area" in c:
        rename_map[col] = "Square_Feet"
    elif "bed" in c:
        rename_map[col] = "Bedrooms"
    elif "bath" in c:
        rename_map[col] = "Bathrooms"
    elif "year" in c:
        rename_map[col] = "Year_Built"
    elif "lot" in c:
        rename_map[col] = "Lot_Size"
    elif "garage" in c:
        rename_map[col] = "Garage"
    elif "price" in c or "sale" in c:
        rename_map[col] = "Price"

ames_renamed = ames.rename(columns=rename_map)

In [None]:
target_cols = ["Square_Feet", "Bedrooms", "Bathrooms", "Year_Built", "Lot_Size", "Garage", "Price"]

In [None]:
ames_filtered = ames_renamed[[c for c in target_cols if c in ames_renamed.columns]].copy()
virginia_filtered = virginia[[c for c in target_cols if c in virginia.columns]].copy()

In [None]:
ames_filtered = ames_filtered.loc[:, ~ames_filtered.columns.duplicated()]
virginia_filtered = virginia_filtered.loc[:, ~virginia_filtered.columns.duplicated()]

ames_filtered = ames_filtered.reset_index(drop=True)
virginia_filtered = virginia_filtered.reset_index(drop=True)

In [None]:
all_cols = sorted(set(ames_filtered.columns).union(set(virginia_filtered.columns)))
ames_filtered = ames_filtered.reindex(columns=all_cols)
virginia_filtered = virginia_filtered.reindex(columns=all_cols)

In [None]:
combined_df = pd.concat([virginia_filtered, ames_filtered], ignore_index=True, axis=0)
combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()]

In [None]:
combined_df = pd.concat([virginia_filtered, ames_filtered], ignore_index=True)
combined_df = combined_df.loc[:, ~combined_df.columns.duplicated()].copy()

In [None]:
for col in ["Square_Feet", "Bedrooms", "Bathrooms", "Year_Built", "Lot_Size", "Garage", "Price"]:
    if col in combined_df.columns:
        combined_df[col] = pd.to_numeric(combined_df[col], errors="coerce")

In [None]:
combined_df = combined_df.dropna(subset=["Square_Feet", "Bedrooms", "Bathrooms", "Year_Built", "Lot_Size", "Garage", "Price"])

In [None]:
X = combined_df[["Square_Feet", "Bedrooms", "Bathrooms", "Year_Built", "Lot_Size", "Garage"]]
y = combined_df["Price"]

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
model = LinearRegression()
model.fit(X_train, y_train)

In [None]:
y_pred = model.predict(X_test)

In [None]:
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

In [None]:
print(f"\nModel Performance:")
print(f"RÂ² Score: {r2:.3f}")
print(f"RMSE: {rmse:,.2f}")

In [None]:
comparison = pd.DataFrame({'Actual': y_test, 'Predicted': y_pred})
print("\nSample Predictions:")
print(comparison.head(10))

In [None]:
plt.scatter(y_test, y_pred, color='blue', alpha=0.6)
plt.xlabel("Actual Prices")
plt.ylabel("Predicted Prices")
plt.title("Combined (Virginia + Ames) House Price Prediction")
plt.grid(True)
plt.show()

In [None]:
sample = pd.DataFrame({
    "Square_Feet": [2700],
    "Bedrooms": [5],
    "Bathrooms": [4],
    "Year_Built": [2018],
    "Lot_Size": [0.49],
    "Garage": [2]
})

predicted_price = model.predict(sample)
print(f"\nPredicted Price for Sample House: ${predicted_price[0]:,.2f}")