### os.path.join(path): 
- returns a string which represents the concatenated path components. 

In [2]:
import os
# make directory: .. previous folder/data dir
#exist_ok (optional) : A default value False is used for this parameter. 
#                      If the target directory already exists an OSError is raised 
#                      if its value is False otherwise not. 
#                      For value True leaves directory unaltered. 
os.makedirs(os.path.join('..', 'data'), exist_ok=True)
data_file = os.path.join('..', 'data', 'house_tiny.csv')
with open(data_file, 'w') as f:
    f.write('name,sex,weight(kg),height(cm),whatEverGrading\n')  # 列名
    f.write('Dave,Dude,50,150,5\n')  # 每行表示一个数据样本
    f.write('Fave,not Dude,60,160,10\n')
    f.write('Cave,Dude,70,170,5\n')
    f.write('Nave,NA,NA,180,6\n')

In [3]:
import pandas as pd

data = pd.read_csv(data_file)
print(data)

   name       sex  weight(kg)  height(cm)  whatEverGrading
0  Dave      Dude        50.0         150                5
1  Fave  not Dude        60.0         160               10
2  Cave      Dude        70.0         170                5
3  Nave       NaN         NaN         180                6


In [4]:
inputs, outputs = data.iloc[:, 1:4], data.iloc[:, -1]
inputs, outputs

(        sex  weight(kg)  height(cm)
 0      Dude        50.0         150
 1  not Dude        60.0         160
 2      Dude        70.0         170
 3       NaN         NaN         180,
 0     5
 1    10
 2     5
 3     6
 Name: whatEverGrading, dtype: int64)

- use mean to fill NAN

In [43]:
inputs = inputs.fillna(inputs.mean())
print(inputs)

        sex  weight(kg)  height(cm)
0      Dude        50.0         150
1  not Dude        60.0         160
2      Dude        70.0         170
3       NaN        60.0         180


- use dummy_na to further divide the list

In [44]:
inputs = pd.get_dummies(inputs, dummy_na=True)
print(inputs)

   weight(kg)  height(cm)  sex_Dude  sex_not Dude  sex_nan
0        50.0         150         1             0        0
1        60.0         160         0             1        0
2        70.0         170         1             0        0
3        60.0         180         0             0        1


In [45]:
import torch

X, y = torch.tensor(inputs.values), torch.tensor(outputs.values)
X, y

(tensor([[ 50., 150.,   1.,   0.,   0.],
         [ 60., 160.,   0.,   1.,   0.],
         [ 70., 170.,   1.,   0.,   0.],
         [ 60., 180.,   0.,   0.,   1.]], dtype=torch.float64),
 tensor([ 5, 10,  5,  6]))