In [9]:
#%pip install Pandas
#%pip install keras
#%pip install tensorflow
#%pip install scikit-learn

Collecting scikit-learn
  Downloading scikit_learn-1.4.2-cp311-cp311-win_amd64.whl.metadata (11 kB)
Collecting joblib>=1.2.0 (from scikit-learn)
  Downloading joblib-1.4.0-py3-none-any.whl.metadata (5.4 kB)
Collecting threadpoolctl>=2.0.0 (from scikit-learn)
  Downloading threadpoolctl-3.4.0-py3-none-any.whl.metadata (13 kB)
Downloading scikit_learn-1.4.2-cp311-cp311-win_amd64.whl (10.6 MB)
   ---------------------------------------- 0.0/10.6 MB ? eta -:--:--
   ---------------------------------------- 0.1/10.6 MB 3.6 MB/s eta 0:00:03
   - -------------------------------------- 0.5/10.6 MB 6.6 MB/s eta 0:00:02
   --- ------------------------------------ 1.0/10.6 MB 7.8 MB/s eta 0:00:02
   ----- ---------------------------------- 1.5/10.6 MB 8.8 MB/s eta 0:00:02
   ------- -------------------------------- 2.1/10.6 MB 9.5 MB/s eta 0:00:01
   ---------- ----------------------------- 2.7/10.6 MB 10.1 MB/s eta 0:00:01
   ------------ --------------------------- 3.3/10.6 MB 10.5 MB/s eta 0:0

DEPRECATION: teroshdl 2.1.2 has a non-standard dependency specifier yowasp-yosys>=0.8.*. pip 24.1 will enforce this behaviour change. A possible replacement is to upgrade to a newer version of teroshdl or contact the author to suggest that they release a version with a conforming dependency specifiers. Discussion can be found at https://github.com/pypa/pip/issues/12063


In [10]:
import os
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import scipy as sp
import keras
import sklearn.decomposition
import sklearn.model_selection
import sklearn.preprocessing
import sklearn.linear_model
import sklearn.metrics


- [1. Introduction](#1-introduction)



# 1. Introduction

Predicting for how much houses will sell will help current and prospective homeowners navigate the cluttered landscape of real estate. 

# 2. Dataset

The dataset provided houses a lot of different parameters on which the price of a house can depend. Just to give a couple examples with their usual impact on the house price from general knowledge and experience:

- LotArea: the area of the land on which a house stands. In general the higher this number the higher the price.
- Bedroom: number of bedrooms above ground. In general the higher the number of bedrooms the higher the price.

There are a lot more prameters that are contained in the dataset. For more detail on this consult the data_description.txt file. 

# 3. Analysing the dataset

For this we first have to open the csv files that contain the data. In here we see that there are some columns that contain numeric data and some that do not. There are also some empty values. Before any models can be trained on the dataset we have to format it such that there are solely numeric values. In the data_description.txt file all the different kinds of non-numeric values can be found. 

In [11]:
# accessing the csv files

train_csv = pd.read_csv("./house-prices-advanced-regression-techniques/train.csv")
test_csv = pd.read_csv("./house-prices-advanced-regression-techniques/test.csv")

data_fields = train_csv.columns.values

print(train_csv.head())
print(data_fields)

   Id  MSSubClass MSZoning  LotFrontage  LotArea Street Alley LotShape  \
0   1          60       RL         65.0     8450   Pave   NaN      Reg   
1   2          20       RL         80.0     9600   Pave   NaN      Reg   
2   3          60       RL         68.0    11250   Pave   NaN      IR1   
3   4          70       RL         60.0     9550   Pave   NaN      IR1   
4   5          60       RL         84.0    14260   Pave   NaN      IR1   

  LandContour Utilities  ... PoolArea PoolQC Fence MiscFeature MiscVal MoSold  \
0         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
1         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      5   
2         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      9   
3         Lvl    AllPub  ...        0    NaN   NaN         NaN       0      2   
4         Lvl    AllPub  ...        0    NaN   NaN         NaN       0     12   

  YrSold  SaleType  SaleCondition  SalePrice  
0   2008        WD   

In [1]:
# formatting the data

# change the NaN values to zero

train_csv = train_csv.mask(pd.isna(train_csv), 0)
test_csv = test_csv.mask(pd.isna(test_csv), 0)

classes = { 
    "mszoning_classes"           :     ["A","C","FV","I","RH","RL","RP","RM"],
    "street_classes"            :      ["Grvl","Pave"],
    "alley_classes"             :      ["Grvl","Pave","NA"],
    "lotshape_classes"          :      ["Reg","IR1","IR2","IR3"],
    "landcontour_classes"       :      ["Lvl","Bnk","HLS","Low"],
    "utilities_classes"         :      ["AllPub","NoSewr","NoSeWa","ELO"],
    "lotconfig_classes"         :      ["Inside","Corner","CulDSac","FR2","FR3"],
    "landslope_classes"         :      ["Gtl","Mod","Sev"],
    "neighborhood_classes"      :      ["Blmngtn","Blueste","BrDale","BrkSide","ClearCr","CollgCr","Crawfor","Edwards","Gilbert","IDOTRR","MeadowV","Mitchel","Names","NoRidge","NPkVill","NridgHt","NWAmes","OldTown","SWISU","Sawyer","SawyerW","Somerst","StoneBr","Timber","Veenker"],
    "condition1_classes"        :      ["Artery","Feedr","Norm","RRNn","RRAn","PosN","PosA","RRNe","RRAe"],
    "condition2_classes"        :      ["Artery","Feedr","Norm","RRNn","RRAn","PosN","PosA","RRNe","RRAe"],
    "bldgtype_classes"          :      ["1Fam","2FmCon","Duplx","TwnhsE","TwnhsI"],
    "housestyle_classes"        :      ["1Story","1.5Fin","1.5Unf","2Story","2.5Fin","2.5Unf","SFoyer","SLvl"],
    "roofstyle_classes"         :      ["Flat","Gable","Gambrel","Hip","Mansard","Shed"],
    "roofmatl_classes"          :      ["ClyTile","CompShg","Membran","Metal","Roll","Tar","WdShake","WdShngl"],
    "exterior1st_classes"       :      ["AsbShng","AsphShn","BrkComm","BrkFace","CBlock","CemntBd","HdBoard","ImStucc","MetalSd","Other","Plywood","PreCast","Stone","Stucco","VinylSd","Wd Sdng","WdShing"],
    "exterior2nd_classes"       :      ["AsbShng","AsphShn","BrkComm","BrkFace","CBlock","CemntBd","HdBoard","ImStucc","MetalSd","Other","Plywood","PreCast","Stone","Stucco","VinylSd","Wd Sdng","WdShing"],
    "masvnrtype_classes"        :      ["BrkCmn","BrkFace","CBlock","None","Stone"],
    "exterqual_classes"         :      ["Ex","Gd","TA","Fa","Po"],
    "extercond_classes"         :      ["Ex","Gd","TA","Fa","Po"],
    "foundation_classes"        :      ["BrkTil","CBlock","PConc","Slab","Stone","Wood"],
    "bsmtqual_classes"          :      ["Ex","Gd","TA","Fa","Po","NA"],
    "bsmtcond_classes"          :      ["Ex","Gd","TA","Fa","Po","NA"],
    "bsmtexposure_classes"      :      ["Gd","Av","Mn","No","NA"],
    "bsmtfintype1_classes"      :      ["GLQ","ALQ","BLQ","Rec","LwQ","Unf","NA"],
    "bsmtfintype2_classes"      :      ["GLQ","ALQ","BLQ","Rec","LwQ","Unf","NA"],
    "heating_classes"           :      ["Floor","GasA","GasW","Grav","OthW","Wall"],
    "heatingqc_classes"         :      ["Ex","Gd","TA","Fa","Po"],
    "centralair_classes"        :      ["N", "Y"],
    "electrical_classes"        :      ["SBrkr","FuseA","FuseF","FuseP","Mix"],
    "kitchenqual_classes"       :      ["Ex","Gd","TA","Fa","Po"],
    "functional_classes"        :      ["Typ","Min1","Min2","Mod","Maj1","Maj2","Sev","Sal"],
    "fireplacequ_classes"       :      ["Ex","Gd","TA","Fa","Po","NA"],
    "garagetype_classes"        :      ["2Types","Attchd","Basment","BuiltIn","CarPort","Detchd","NA"],
    "garagefinish_classes"      :      ["Fin","RFn","Unf","NA"],
    "garagequal_classes"        :      ["Ex","Gd","TA","Fa","Po","NA"],
    "garagecond_classes"        :      ["Ex","Gd","TA","Fa","Po","NA"],
    "paveddrive_classes"        :      ["Y","P","N"],
    "poolqc_classes"            :      ["Ex","Gd","TA","Fa","NA"],
    "fence_classes"             :      ["GdPrv","MnPrv","GdWo","MnWw","NA"],
    "miscfeature_classes"       :      ["Elev","Gar2","Othr","Shed","TenC","NA"],
    "saletype_classes"          :      ["WD","CWD","VWD","New","COD","Con","ConLw","ConLI","ConLD","Oth"],
    "salecondition_classes"     :      ["Normal","Abnorml","AdjLand","Alloca","Family","Partial"]
}

enumerate = np.linspace(0,100,100)
for key in classes:
    for i,j in zip(classes[key], enumerate):
        train_csv = train_csv.mask(train_csv == i, j)


    
print(train_csv)
print(test_csv)

SyntaxError: '{' was never closed (2543416734.py, line 8)

# N. References

- Predicting House Prices (Keras - ANN), Tomas Mantero, https://www.kaggle.com/code/tomasmantero/predicting-house-prices-keras-ann/notebook
- 