In [1]:
!pip install kaggle==1.5.12


Collecting kaggle==1.5.12
  Downloading kaggle-1.5.12.tar.gz (58 kB)
[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/59.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m59.0/59.0 kB[0m [31m2.3 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: kaggle
  Building wheel for kaggle (setup.py) ... [?25l[?25hdone
  Created wheel for kaggle: filename=kaggle-1.5.12-py3-none-any.whl size=73026 sha256=6fe5074184d9060b591c880415061249e3d4f9278f44524b698241088a6ec553
  Stored in directory: /root/.cache/pip/wheels/2e/27/39/f44e52756a6407b444143f233abe9fda0e18a23e8b20e0cd1c
Successfully built kaggle
Installing collected packages: kaggle
  Attempting uninstall: kaggle
    Found existing installation: kaggle 1.6.17
    Uninstalling kaggle-1.6.17:
      Successfully uninstalled kaggle-1.6.17
Successfully installed kaggle-1.5.12


In [2]:
!mkdir ~/.kaggle
!cp kaggle.json ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json

In [3]:
!kaggle datasets download -d yasserh/housing-prices-dataset


Downloading housing-prices-dataset.zip to /content
  0% 0.00/4.63k [00:00<?, ?B/s]
100% 4.63k/4.63k [00:00<00:00, 1.51MB/s]


In [4]:
!unzip housing-prices-dataset.zip

Archive:  housing-prices-dataset.zip
  inflating: Housing.csv             


In [8]:
# prompt: HousePricePredictingWithRegression

import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import OneHotEncoder  # Import OneHotEncoder

try:
    df = pd.read_csv('Housing.csv')
except FileNotFoundError:
    print("Error: 'Housing.csv' not found. Please make sure the file exists and the path is correct.")
    exit()

# Data preprocessing (example - adapt to your specific data)
# 1. Handle missing values (example: fill with mean)
for col in df.columns:
    if df[col].isnull().any():
        df[col].fillna(df[col].mean(), inplace=True)


# 2. Feature selection (example: select relevant features)
features = ['area', 'bedrooms', 'bathrooms', 'stories', 'mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'parking', 'prefarea', 'furnishingstatus']
X = df[features]
y = df['price'] # Assuming 'price' is the target variable

# 3. One-Hot Encoding for categorical features
categorical_features = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']
encoder = OneHotEncoder(sparse_output=False, handle_unknown='ignore') # Create OneHotEncoder instance
encoded_data = encoder.fit_transform(X[categorical_features]) # Fit and transform categorical features
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_features)) # Create DataFrame from encoded data
X = X.drop(categorical_features, axis=1) # Drop original categorical features
X = pd.concat([X, encoded_df], axis=1) # Concatenate encoded features with numerical features


# 4. Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model training
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared: {r2}")

# Example prediction for a new house
# Create a dictionary with the same features used during training
new_house_data = pd.DataFrame({
    'area': [1500],  # Assuming 'sqft_living' should be 'area'
    'bedrooms': [3],
    'bathrooms': [2],
    'stories': [1],  # You need to provide values for all features
    'mainroad': ['yes'],  # Example value, replace with actual
    'guestroom': ['no'],  # Example value, replace with actual
    'basement': ['no'],  # Example value, replace with actual
    'hotwaterheating': ['no'],  # Example value, replace with actual
    'airconditioning': ['no'],  # Example value, replace with actual
    'parking': [1],  # Example value, replace with actual
    'prefarea': ['yes'],  # Example value, replace with actual
    'furnishingstatus': ['furnished']  # Example value, replace with actual
})

# One-Hot Encoding for the new data
categorical_features = ['mainroad', 'guestroom', 'basement', 'hotwaterheating', 'airconditioning', 'prefarea', 'furnishingstatus']
# Use the same encoder used during training to transform new data
encoded_data = encoder.transform(new_house_data[categorical_features])
encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_features))
new_house_data = new_house_data.drop(categorical_features, axis=1)
new_house_data = pd.concat([new_house_data, encoded_df], axis=1)

predicted_price = model.predict(new_house_data)
print(f"Predicted price for new house: {predicted_price[0]}")

Mean Squared Error: 1754318687330.6616
R-squared: 0.6529242642153188
Predicted price for new house: 4663340.262778299
