## Reading the Dataset

In [None]:
import os
def mkdir_if_not_exist(path):
  if not isinstance(path,str):
    path = os.path.join(*path)
  if not os.path.exists(path):
    os.makedirs(path)

In [None]:
datafile = './data/house_tiny.csv'
mkdir_if_not_exist('./data')
with open(datafile,'w') as f:
    f.write('NumRooms,Alley,Price\n')  # Column names
    f.write('NA,Pave,127500\n')  # Each row represents a data point
    f.write('2,NA,106000\n')
    f.write('4,NA,178100\n')
    f.write('NA,NA,140000\n')

In [29]:
import pandas as pd
data = pd.read_csv(datafile)
data

Unnamed: 0,NumRooms,Alley,Price
0,,Pave,127500
1,2.0,,106000
2,4.0,,178100
3,,,140000


## Handling Missing Data

In [30]:
input,output =  data.iloc[:,0:2],data.iloc[:,2]
inputs = input.fillna(input.mean())
inputs

Unnamed: 0,NumRooms,Alley
0,3.0,Pave
1,2.0,
2,4.0,
3,3.0,


In [31]:
inputs = pd.get_dummies(inputs,dummy_na=True)
inputs

Unnamed: 0,NumRooms,Alley_Pave,Alley_nan
0,3.0,1,0
1,2.0,0,1
2,4.0,0,1
3,3.0,0,1


## Conversion to the Tensor Format

In [32]:
## Numpy
import numpy as np
X,y = np.array(inputs.values),np.array(output.values)
print(X,y)

[[3. 1. 0.]
 [2. 0. 1.]
 [4. 0. 1.]
 [3. 0. 1.]] [127500 106000 178100 140000]


In [33]:
## Pytorch
import torch
X,y = torch.tensor(inputs.values),torch.tensor(output.values)
print(X,y)

tensor([[3., 1., 0.],
        [2., 0., 1.],
        [4., 0., 1.],
        [3., 0., 1.]], dtype=torch.float64) tensor([127500, 106000, 178100, 140000])


In [34]:
## Tensorflow
import tensorflow as tf
X,y = tf.constant(inputs.values,dtype=tf.float32),tf.constant(output.values,dtype=tf.float32)
print(X,y)

tf.Tensor(
[[3. 1. 0.]
 [2. 0. 1.]
 [4. 0. 1.]
 [3. 0. 1.]], shape=(4, 3), dtype=float32) tf.Tensor([127500. 106000. 178100. 140000.], shape=(4,), dtype=float32)
