In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 

sns.set()

In [2]:
df = pd.read_csv('train.csv')
df.head()

Unnamed: 0,Id,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,1,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,...,0,,,,0,2,2008,WD,Normal,208500
1,2,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,...,0,,,,0,5,2007,WD,Normal,181500
2,3,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,...,0,,,,0,9,2008,WD,Normal,223500
3,4,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,...,0,,,,0,2,2006,WD,Abnorml,140000
4,5,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,...,0,,,,0,12,2008,WD,Normal,250000


In [3]:
df.isna().sum().sort_values(ascending=False)[:20]

PoolQC          1453
MiscFeature     1406
Alley           1369
Fence           1179
FireplaceQu      690
LotFrontage      259
GarageYrBlt       81
GarageCond        81
GarageType        81
GarageFinish      81
GarageQual        81
BsmtFinType2      38
BsmtExposure      38
BsmtQual          37
BsmtCond          37
BsmtFinType1      37
MasVnrArea         8
MasVnrType         8
Electrical         1
Id                 0
dtype: int64

## Unicode

In [4]:
df.dtypes

Id                 int64
MSSubClass         int64
MSZoning          object
LotFrontage      float64
LotArea            int64
                  ...   
MoSold             int64
YrSold             int64
SaleType          object
SaleCondition     object
SalePrice          int64
Length: 81, dtype: object

In [5]:
uncod = {
     'PoolQC' : {'Ex':5, 'Gd':4, 'TA':3, 'Fa':2, 'NA': 0},
     'MSZoning' : {'A': 1, 'C': 2, 'FV': 3, 'I': 4, 'RH': 5, 'RL': 6, 'RP': 7, 'RM': 8, 'C (all)': 2},
     'Street' : {'Grvl': 1,	'Pave': 2},
     'Alley' : {'Grvl': 1,	'Pave': 2, 'NA': 0},
     'LotShape' : {'Reg': 1, 'IR1': 2, 'IR2': 3, 'IR3': 4},
     'LandContour': {'Lvl': 1, 'Bnk': 2, 'HLS':3, 'Low': 4},
     'Utilities' : {'AllPub': 1, 'NoSewr': 2, 'NoSeWa': 3, 'ELO': 4},
     'LotConfig' : {'Inside': 1, 'Corner': 2, 'CulDSac': 3, 'FR2': 4, 'FR3': 5},
     'LandSlope' : {'Gtl': 1, 'Mod': 2, 'Sev': 3},
     'Neighborhood' : {'Blmngtn': 1, 'Blueste': 2, 'BrDale': 3, 'BrkSide': 4, 'ClearCr': 5, 
                        'CollgCr': 6, 'Crawfor': 7, 'Edwards': 8, 'Gilbert': 9, 'IDOTRR': 10,
                        'MeadowV': 11, 'Mitchel': 12, 'NAmes': 13, 'NoRidge': 14, 'NPkVill': 15,
                        'NridgHt': 16, 'NWAmes': 17, 'OldTown': 18, 'SWISU': 19, 'Sawyer': 20,
                        'SawyerW': 21, 'Somerst': 22, 'StoneBr': 23, 'Timber': 24, 'Veenker': 25},
     'Condition1' : {'Artery': 1, 'Feedr': 2, 'Norm': 3, 'RRNn': 4, 'RRAn': 5, 'PosN': 6, 'PosA': 7, 'RRNe': 8, 'RRAe': 9},
     'Condition2' : {'Artery': 1, 'Feedr': 2, 'Norm': 3, 'RRNn': 4, 'RRAn': 5, 'PosN': 6, 'PosA': 7, 'RRNe': 8, 'RRAe': 9},
     'BldgType' : {'1Fam': 1, '2fmCon': 2, 'Duplex': 3, 'TwnhsE': 4, 'Twnhs': 5},
     'HouseStyle' : {'1Story': 1, '1.5Fin': 2, '1.5Unf': 3, '2Story': 4, '2.5Fin': 5, '2.5Unf': 6, 'SFoyer': 7, 'SLvl': 8},
     'RoofStyle' : {'Flat': 1, 'Gable': 2, 'Gambrel': 3, 'Hip': 4, 'Mansard': 5, 'Shed': 6},
     'RoofMatl' : {'ClyTile': 1, 'CompShg': 2, 'Membran': 3, 'Metal': 4, 'Roll': 5, 'Tar&Grv': 6, 'WdShake': 7, 'WdShngl': 8},
     'Exterior1st' : {'AsbShng': 1, 'AsphShn': 2, 'BrkComm': 3, 'BrkFace': 4, 'CBlock': 5, 'CemntBd': 6, 'HdBoard': 7,
                      'ImStucc': 8, 'MetalSd': 9, 'Other': 10, 'Plywood': 11, 'PreCast': 12, 'Stone': 13, 'Stucco': 14, 'VinylSd': 15, 'Wd Sdng': 16, 'WdSdng': 16, 'WdShing': 17 },
     'Exterior2nd' : {'AsbShng': 1, 'AsphShn': 2, 'Brk Cmn': 3, 'BrkFace': 4, 'CBlock': 5, 'CmentBd': 6, 'HdBoard': 7,
                      'ImStucc': 8, 'MetalSd': 9, 'Other': 10, 'Plywood': 11, 'PreCast': 12, 'Stone': 13, 'Stucco': 14, 'VinylSd': 15, 'Wd Sdng': 16, 'WdSdng': 16, 'WdShing': 17, 'Wd Shng': 17 },
     'MasVnrType' : {'BrkCmn': 1, 'BrkFace': 2, 'CBlock': 3, 'None': 4, 'Stone': 5, 'NA': 0},
     'ExterQual' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1},
     'ExterCond' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1},
     'Foundation' : {'BrkTil': 1, 'CBlock': 2, 'PConc': 3, 'Slab': 4, 'Stone': 5, 'Wood': 6},
     'BsmtQual' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1, 'NA': 0},
     'BsmtCond' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1, 'NA': 0},
     'BsmtExposure' : {'Gd': 4, 'Av': 3, 'Mn': 2, 'No': 1, 'NA': 0},
     'BsmtFinType1' : {'GLQ': 6, 'ALQ': 5, 'BLQ': 4, 'Rec': 3, 'LwQ': 2, 'Unf': 1, 'NA': 0},
     'BsmtFinType2' : {'GLQ': 6, 'ALQ': 5, 'BLQ': 4, 'Rec': 3, 'LwQ': 2, 'Unf': 1, 'NA': 0},
     'Heating' : {'Floor': 1, 'GasA': 2, 'GasW': 3, 'Grav': 4, 'OthW': 5, 'Wall': 6},
     'HeatingQC' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1},
     'CentralAir' : {'N': 0, 'Y': 1},
     'Electrical' : {'SBrkr': 1, 'FuseA': 2, 'FuseF': 3, 'FuseP': 4, 'Mix': 5, 'NA': 0},
     'KitchenQual' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1},
     'Functional' : {'Typ': 1, 'Min1': 2, 'Min2': 3, 'Mod': 4, 'Maj1': 5, 'Maj2': 6, 'Sev': 7, 'Sal': 8},
     'FireplaceQu' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1, 'NA': 0},
     'GarageType' : {'2Types': 1, 'Attchd': 2, 'Basment': 3, 'BuiltIn': 4, 'CarPort': 5, 'Detchd': 6, 'NA': 0},
     'GarageFinish' : {'Fin': 1, 'RFn': 2, 'Unf': 3, 'NA': 0},
     'GarageQual' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1, 'NA': 0},
     'GarageCond' : {'Ex': 5, 'Gd': 4, 'TA': 3, 'Fa': 2, 'Po': 1, 'NA': 0},
     'PavedDrive' : {'Y': 1, 'P': 2, 'N': 3},
     'Fence' : {'GdPrv': 1, 'MnPrv': 2, 'GdWo': 3, 'MnWw': 4, 'NA': 0},
     'MiscFeature' : {'Elev': 1, 'Gar2': 2, 'Othr': 3, 'Shed': 4, 'TenC': 5, 'NA': 0},
     'SaleType' : {'WD': 1, 'CWD': 2, 'VWD': 3, 'New': 4, 'COD': 5, 'Con': 6, 'ConLw': 7, 'ConLI': 8, 'ConLD': 9, 'Oth': 10},
     'SaleCondition' : {'Normal': 1, 'Abnorml': 2, 'AdjLand': 3, 'Alloca': 4, 'Family': 5, 'Partial': 6},
}

In [6]:
#df.replace(np.nan, 'NA', inplace=True)

In [7]:
df.isna().sum()

Id                 0
MSSubClass         0
MSZoning           0
LotFrontage      259
LotArea            0
                ... 
MoSold             0
YrSold             0
SaleType           0
SaleCondition      0
SalePrice          0
Length: 81, dtype: int64

In [8]:
df.MasVnrType.unique()

array(['BrkFace', 'None', 'Stone', 'BrkCmn', nan], dtype=object)

In [9]:
'''coloms = df.columns
for i in coloms:
    try:
        df[i] = df[i].apply(lambda x: uncod[i][x])
    except Exception as e:
        print(e)'''

'coloms = df.columns\nfor i in coloms:\n    try:\n        df[i] = df[i].apply(lambda x: uncod[i][x])\n    except Exception as e:\n        print(e)'

In [10]:
df = df.drop('Id', axis=1)

In [11]:
df.head()

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
1,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
2,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
3,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
4,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000


In [12]:
#df.replace('NA', 0, inplace=True)
# use one hot encoder


In [13]:
df.dtypes.values.__len__()

80

In [14]:
df.dtypes.values

array([dtype('int64'), dtype('O'), dtype('float64'), dtype('int64'),
       dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'),
       dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'),
       dtype('O'), dtype('O'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('O'), dtype('O'), dtype('O'),
       dtype('O'), dtype('O'), dtype('float64'), dtype('O'), dtype('O'),
       dtype('O'), dtype('O'), dtype('O'), dtype('O'), dtype('O'),
       dtype('int64'), dtype('O'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('O'), dtype('O'), dtype('O'), dtype('O'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('int64'), dtype('int64'),
       dtype('int64'), dtype('int64'), dtype('O'), dtype('int64'),
       dtype('O'), dtype('int64'), dtype('O'), dtype('O'),
       dtype('float64'), dtype('O'), dtype('int64'), dtype('int64'),
       dtype('O'), dtype('O'), dtype('O'), d

## Normalizing

In [15]:
train, validate = df[:int(.8 * len(df))], df[int(.8 * len(df)):]

In [16]:
train

Unnamed: 0,MSSubClass,MSZoning,LotFrontage,LotArea,Street,Alley,LotShape,LandContour,Utilities,LotConfig,...,PoolArea,PoolQC,Fence,MiscFeature,MiscVal,MoSold,YrSold,SaleType,SaleCondition,SalePrice
0,60,RL,65.0,8450,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,2,2008,WD,Normal,208500
1,20,RL,80.0,9600,Pave,,Reg,Lvl,AllPub,FR2,...,0,,,,0,5,2007,WD,Normal,181500
2,60,RL,68.0,11250,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2008,WD,Normal,223500
3,70,RL,60.0,9550,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2006,WD,Abnorml,140000
4,60,RL,84.0,14260,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,12,2008,WD,Normal,250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1163,90,RL,60.0,12900,Pave,,Reg,Lvl,AllPub,Inside,...,0,,,,0,1,2008,WD,Alloca,108959
1164,80,RL,,16157,Pave,,IR1,Lvl,AllPub,FR2,...,0,,,,0,6,2007,WD,Normal,194000
1165,20,RL,79.0,9541,Pave,,IR1,Lvl,AllPub,Inside,...,0,,,,0,9,2009,New,Partial,233170
1166,20,RL,64.0,10475,Pave,,IR1,Lvl,AllPub,Corner,...,0,,,,0,2,2010,WD,Normal,245350


In [17]:
train_data, train_target = train.iloc[:, :-1], train.iloc[:, -1]
validate_data, validate_target = validate.iloc[:, :-1], validate.iloc[:, -1]

from sklearn.preprocessing import OneHotEncoder

ohe = OneHotEncoder(handle_unknown='ignore')
ohe.fit(train_data)
train_data = ohe.transform(train_data)

ohe.fit(validate_data)
validate_data = ohe.transform(validate_data)

In [18]:
train_data[0]

<1x6663 sparse matrix of type '<class 'numpy.float64'>'
	with 79 stored elements in Compressed Sparse Row format>

In [19]:
for i, val in enumerate(train_data):
    
    for j, val2 in enumerate(val):
        if i == 0 and val2 == :
            print(val2)
            #train_data[i][j] = 0

SyntaxError: invalid syntax (Temp/ipykernel_17744/3756942371.py, line 4)

In [None]:
from sklearn import preprocessing

train_data    = preprocessing.scale(train_data, with_mean=False).toarray()
validate_data = preprocessing.scale(validate_data, with_mean=False).toarray()

In [None]:
print(train_data[0])

[0.         0.         0.         ... 0.         2.57112219 0.        ]


In [None]:
np.savez('house_train', inputs=train_data, targets=train_target)
np.savez('house_validation', inputs=validate_data, targets=validate_target)

In [None]:
import tensorflow as tf

In [None]:
npz = np.load('house_train.npz', allow_pickle=True)
train_inputs = npz['inputs']
train_targets = npz['targets']

npz = np.load('house_validation.npz',  allow_pickle=True)
validate_inputs = npz['inputs']
validate_targets = npz['targets']

In [None]:
train_inputs

array(<1168x6663 sparse matrix of type '<class 'numpy.float64'>'
	with 92272 stored elements in Compressed Sparse Column format>,
      dtype=object)

In [None]:
train_inputs.replace(np.nan, 0, inplace=True)

AttributeError: 'numpy.ndarray' object has no attribute 'replace'

In [None]:
input_size = len(train_data[0])
output_size = 1
hidden_layer_size = 100

model = tf.keras.Sequential([
    tf.keras.l
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dropout(.2),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(hidden_layer_size, activation='relu'),
    tf.keras.layers.Dense(output_size)
])

opt = tf.keras.optimizers.Adam(
   learning_rate = 0.0005
)

model.compile(optimizer='Adam', loss='mean_absolute_error', metrics=['accuracy'])

max_epochs = 50
early_stopping = tf.keras.callbacks.EarlyStopping(patience=2)

model.fit(train_data,
          train_target,
          epochs=max_epochs,
          callbacks=[early_stopping],
          validation_data=(validate_data, validate_target),
          verbose = 2 
          )  

Epoch 1/50


ValueError: in user code:

    File "C:\Users\Repoo\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1366, in test_function  *
        return step_function(self, iterator)
    File "C:\Users\Repoo\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1356, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "C:\Users\Repoo\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1349, in run_step  **
        outputs = model.test_step(data)
    File "C:\Users\Repoo\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\training.py", line 1303, in test_step
        y_pred = self(x, training=False)
    File "C:\Users\Repoo\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "C:\Users\Repoo\AppData\Local\Programs\Python\Python39\lib\site-packages\keras\engine\input_spec.py", line 247, in assert_input_compatibility
        raise ValueError(

    ValueError: Exception encountered when calling layer "sequential_3" (type Sequential).
    
    Input 0 of layer "dense_12" is incompatible with the layer: expected axis -1of input shape to have value 6663, but received input with shape (None, 2705)
    
    Call arguments received:
      • inputs=tf.Tensor(shape=(None, 2705), dtype=float32)
      • training=False
      • mask=None


In [None]:
validate_data[0]

array([ 0.35973635, -0.07982944,  1.88357176,  1.24674768,  0.05862104,
               nan,  0.72557738,  0.34116757,  0.        , -1.91711067,
       -0.23833939, -0.84694832, -0.02106669,  0.        , -0.40877432,
        0.97063096, -0.06592134,  1.29607976, -1.1652957 ,  0.06352426,
        1.65422345, -0.07576627,  0.34118096,  0.40069065,  0.30520654,
       -0.65884268,  0.66631217,  0.33480546, -0.51142187,  0.81196225,
        0.25776089,  0.64805096,  0.59638874,  0.5394281 ,  0.30551018,
       -0.31477085, -0.06028675,  0.42749562, -0.1393889 , -0.870947  ,
        0.24863262,  0.26164686,  0.44878905,  1.22730597, -0.10745857,
        1.30422663, -0.85985474, -0.19785325,  0.73202014, -0.76343316,
        1.35964926, -0.23270502, -0.42932708,  0.20964296,  0.24558237,
        2.11428512,  1.14745618, -0.17040257, -1.60595731,  1.03028843,
        0.26888428,  0.43681733,  0.25506845,  0.23363466,  0.32806805,
       -0.70258158, -0.84546568, -0.34864985, -0.09425763,  0.64

In [None]:
validate_target

1168    235000
1169    625000
1170    171000
1171    163000
1172    171900
         ...  
1455    175000
1456    210000
1457    266500
1458    142125
1459    147500
Name: SalePrice, Length: 292, dtype: int64

In [None]:
model.predict(validate_data[2].reshape(1,79))

array([[nan]], dtype=float32)